In [1]:
from termcolor import colored
from sklearn.tree import DecisionTreeClassifier
import missingno as msno
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn import metrics
import pickle
import pprint
from sklearn.ensemble import RandomForestRegressor
from pandas_profiling import ProfileReport
from dateutil import relativedelta
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import train_test_split
from statsmodels.regression.linear_model import OLS

plot_______ = False
plot_______ = True

def new_line():
    print("\n-------------------------\n")

def RMSE(predictions):
    return round(np.sqrt(((test_y - predictions)**2).mean()))

def cluping_rare_cases_in_one_catagory(x):
    global df
    x = df[x]
    orignal  = x.copy("deep")
    xx = x.value_counts()
    xx = xx[xx< 10].index.to_list()
    x =  x.replace(xx , "Rare cases")
    if x.value_counts()[-1] < 8:
        x[x == "Rare cases"] = x.mode()[0] # agar "Rare cases" vali catogery me 8 sy bhi kam values hon to un ko most common value sy replace kar do
    if x.nunique() == 1:
        new_line()
        # to_print = f"The column <{x.name}> have only one unique value, We droped it from the data."
        to_print = f"The column <{x.name}> have imbalanced, so we droped it, it has {orignal.nunique()} unique values, and most commont value frequency ratio is {(orignal == orignal.mode()[0]).mean()}"
        print(colored(to_print, 'red'))
        # return orignal
        df.drop(columns=x.name, inplace=True)
        return None
    return x

def plot_numerical_columns(col_name):
    if not plot_______:
        return None

    # Histogram
    df[col_name].plot(kind="hist", figsize=(13,8));
    plt.title(col_name, size=18);
    plt.axhline(y=df[col_name].mean(), color='red');
    plt.axhline(y=df[col_name].median(), color='green');
    plt.legend(['Actual', 'Mean', 'Median']);
    plt.show()

    # Scatter plot
    df[col_name].plot(figsize=(13,8));
    plt.title(col_name, size=18);
    plt.axhline(y=df[col_name].mean(), color='red');
    plt.axhline(y=df[col_name].median(), color='green');
    plt.legend(['Actual', 'Mean', 'Median']);
    plt.show()

    # scatter plot (sort by values), values Vs index
    df[col_name].sort_values().reset_index(drop=True).plot(figsize=(13,8));
    plt.title(col_name+" (SORTED)", size=18);
    plt.axhline(y=df[col_name].mean(), color='red');
    plt.axhline(y=df[col_name].median(), color='green');
    plt.legend(['Actual', 'Mean', 'Median']);
    plt.show()

    # box plot
    df[col_name].plot(kind="box", figsize=(13,8))
    plt.title(col_name, size=18);
    plt.xlabel("");
    plt.show()

def plot_date_columns(col_name):
    if not plot_______:
        return None
    df[col_name].plot(figsize=(15,7), grid=True);
    plt.xlabel("Index", size=14);
    plt.ylabel("Date", size=14);
    plt.title(col_name + " Graph", size=18);
    plt.show();

    df[col_name].sort_values().reset_index(drop=True).plot(figsize=(15,7), grid=True);
    plt.xlabel("Index (sorted)", size=14);
    plt.ylabel("Year", size=14);
    plt.title(col_name + " Graph", size=18);
    plt.show();

    (df[col_name].dt.year.value_counts(sort=False).sort_index() / len(df) * 100).plot(kind="bar", figsize=(15,7), grid=True);
    plt.xlabel("Year", size=14);
    plt.ylabel("Ratio (1-100)", size=14);
    plt.title(col_name + " year Frequency Graph", size=18);
    plt.show();

    (df[col_name].dt.month.value_counts().sort_index()/len(df) * 100).plot(kind="bar", figsize=(15,7), grid=True);
    plt.xlabel("Month", size=14);
    plt.ylabel("Ratio (1-100)", size=14);
    plt.title(col_name + " month Frequency Graph", size=18);
    plt.show();

    (df[col_name].dt.day.value_counts().sort_index()/len(df) * 100).plot(kind="bar", figsize=(15,7), grid=True);
    plt.xlabel("Day", size=14);
    plt.ylabel("Ratio (1-100)", size=14);
    plt.title(col_name + " Day Frequency Graph", size=18);
    plt.show();

def plot_catagorical_columns(cat_variable):
    if not plot_______:
        return None
    (df[cat_variable].value_counts() / len(df) * 100).plot.bar(figsize=(15,6), grid=True);
    plt.title(cat_variable, size=18, color='r');
    plt.xlabel("Catagory", size=14, color='r');
    plt.ylabel("Ratio (1-100)", size=14, color='r');
    plt.show()

def data_shape():
    return f"The Data have:\n\t{df.shape[0]} rows\n\t{df.shape[1]} columns\n"
#===
# df = pd.read_csv("data.csv", date_parser=True)

# df = pd.read_csv("df_only_selected_columns_using_PCA.csv", date_parser=True)
# target_variable = "ACTUAL_WORTH"
# df = pd.concat([
#         df.select_dtypes("number").iloc[:, :3],
#         df.select_dtypes("O").iloc[:, :3],
#         df.select_dtypes(exclude=["number", "O"]),
#         df[[target_variable]]], 1)
# target_variable = "AREA_NAME_EN"

# df = pd.read_csv("cleaned_data.csv", date_parser=True)
# target_variable = "SalePrice"

train = pd.read_csv("/home/amir/Downloads/train.csv")
test  = pd.read_csv("/home/amir/Downloads/test.csv")
target_variable = "SalePrice"
train_y = train[target_variable]
train = train.drop(columns=target_variable)
df = pd.concat([train, test])
df[target_variable] = train_y.to_list() + [None]*len(test)
#===
new_line()
print(data_shape())
#===
new_line()
print(f"Columns types distribution:\n\n{df.dtypes.value_counts()}\n")
df.dtypes.value_counts().plot(kind='barh', figsize=(10, 2), grid=True, title="Variable types Count Graph");
plt.xlabel("Count");
plt.show()
#===
f = df[target_variable].isna().sum()
if f:
    new_line()
    to_print = f"There are {f} NAs in target values, we droped those rows"
    print(colored(to_print, 'red'))
    df = df[df[target_variable].notna()]
del f
#---------------------------------------------------
# df.select_dtypes("O").columns[:5]
# D = df.select_dtypes(exclude="O")
# D2 = df.select_dtypes("O").iloc[:,:5]
# df = pd.concat([D, D2], 1)

# profile = ProfileReport(df, title='Pandas Profiling Report', explorative=True)
# profile.to_file("your_report.html")
#---------------------------------------- NA
a = df.isna().sum().where(lambda x:x>0).dropna()
if a.size:
    new_line()
    to_print = f"There are {len(a)} (out of {df.shape[1]}, [{round(len(a)/df.shape[1]*100)}%]) columns that contains 1 or more NA."
    print(colored(to_print, 'red'))

    for i in a.index:
        df[i+"_NA_indicator"] = df[i].isna().replace({True : "Missing", False : "Not missing"})
    new_line()
    to_print = f"{a.size} NA_indicator variables added to the data\n"
    print(colored(to_print, 'red'))


    print("========= NA Graphs =========\n")
    msno.matrix(df);
    plt.title("NA Graph");
    plt.show()

    new_line()
    sns.heatmap(df.isnull(), cbar=False);
    plt.title("NA Graph");
    plt.show()
#===
a = a.sort_values()/len(df)*100
if (a == 100).sum():
    new_line()
    df.drop(columns=a[a==100].index, inplace=True)
    to_print = f"There are {(a == 100).sum()} columns that are all Missing values, so we droped those.\nNow {data_shape()}\n\nDropped columns names:"
    print(colored(to_print, 'red'))
    for i in a[a==100].index:
        print("\t",i)
    a = a[a != 100]
#===
x = df[a.index].dtypes.value_counts()
if x.size:
    new_line()
    print(f"NA columns data type Distribution:\n\n{x}")
del x
#===
new_line()
if a.size:
    print(f"NaN Ratio (0-100)\n\n{a}")
else:
    print(colored("Now There is no NaN value in our Data", 'red'))
#===
# ----------------------------------------------- Imputing Missing values
# ------------------------------------ Numerical columns imputing
if df.select_dtypes("number").isna().sum().sum():
    new_line()
    print(f'(Before Missing values treatment)\nThere are {df.isna().sum().sum()} Missing values:\n\t{df.select_dtypes("O").isna().sum().sum()} in catagorical variables\n\t{df.select_dtypes("number").isna().sum().sum()} in numerical columns\n\t{df.select_dtypes(exclude=["O", "number"]).isna().sum().sum()} in others')
    from sklearn.impute import KNNImputer
    df_not_a_number  = df.select_dtypes(exclude="number")
    df_number        = df.select_dtypes("number")
    del df
    imputer = KNNImputer(n_neighbors=4, weights="uniform")
    imputed = imputer.fit_transform(df_number)
    df_number = pd.DataFrame(imputed, columns=df_number.columns)
    df = pd.concat([df_not_a_number.reset_index(drop=True), df_number.reset_index(drop=True)], axis=1)
    del df_not_a_number
    del df_number
    print(f'\n(After filling numeric missing values)\nThere are {df.isna().sum().sum()} Missing values:\n\t{df.select_dtypes("O").isna().sum().sum()} in catagorical variables\n\t{df.select_dtypes("number").isna().sum().sum()} in numerical columns\n\t{df.select_dtypes(exclude=["O", "number"]).isna().sum().sum()} in others')
#===
# -------------------------------- Catagoriacal variables imputating
vars_to_fill = df.select_dtypes("O").isna().mean().where(lambda x:x>0).dropna().sort_values(ascending=True)
if vars_to_fill.size:
    for col in vars_to_fill.index:
        tr = pd.concat([df[[col]], df.loc[:,df.isna().sum() == 0]], 1)
        tr_y = tr[col]
        tr_X = tr.drop(columns=col)

        tr_T = tr_X.select_dtypes("number")
        cat_cols = pd.get_dummies(tr_X.select_dtypes(exclude="number"), prefix_sep="__")
        tr_T[cat_cols.columns.to_list()] = cat_cols

        tr_T[col] = tr_y
        tr = tr_T.copy("deep")

        train = tr[tr[col].notna()]
        test  = tr[tr[col].isna()]

        train_y = train[col]
        train_X = train.drop(columns=col)

        test_X = test.drop(columns=col)

        clf = DecisionTreeClassifier().fit(train_X, train_y)
        test_y = clf.predict(test_X)

        df.loc[df[col].isna(), col] = test_y
    new_line()
    print(f"Missing values imputed, Now there are {df.isna().sum().sum()} Missing values")
# ----------------------------------------------- END Imputing Missing values
# --------------------------------------------------------- Unique values
only_one_unique_value = df.nunique().where(lambda x:x == 1).dropna()
if only_one_unique_value.size:
    new_line()
    df.drop(columns=only_one_unique_value.index, inplace=True)
    last_ = ("", "it") if  only_one_unique_value.size == 1 else ("s", "those")
    to_print = f"There are {only_one_unique_value.size} variable{last_[0]} That have only one unique value, so we droped {last_[1]}.\nDropped column{last_[0]} name{last_[0]} (in order):"
    print(colored(to_print, 'red'))
    for i in only_one_unique_value.index.sort_values():
        print(i)
    new_line()
    print(f"\nNow {data_shape()}")
del only_one_unique_value
# #===
all_values_are_unique = df.apply(lambda x:x.is_unique).where(lambda x:x==True).dropna()
if all_values_are_unique.size:
    new_line()
    df.drop(columns=all_values_are_unique.index, inplace=True)
    last_ = ("", "it") if  all_values_are_unique.size == 1 else ("s", "those")
    to_print = f"There are {all_values_are_unique.size} column{last_[0]} that have all unique values, so no value repeatation, we droped {last_[1]} column{last_[0]}.\nDropped column{last_[0]} name{last_[0]} are:\n"
    print(colored(to_print, 'red'))
    for i in all_values_are_unique.index:
        print("\t", i)
    new_line()
    print(f"Now {data_shape()}")
del all_values_are_unique
#===
date_columns = []
def DTYPES():
    global date_columns
    catagorical_columns = df.head().select_dtypes("O").columns
    numerical_columns   = df.head().select_dtypes("number").columns
    date_columns        = []

    for i in catagorical_columns:
        try:
            df[i] = pd.to_datetime(df[i])
            date_columns.append(i)
        except:
            pass

    catagorical_columns = catagorical_columns.drop(date_columns)
    if date_columns:
        date_columns = pd.Index(date_columns)
    #===
    if not catagorical_columns.append(numerical_columns).append(date_columns).is_unique:
        new_line()
        print(colored("Some column/s repated in > 1 dtypes\n", 'red'))
        dtypes = pd.DataFrame({"Column" : catagorical_columns.append(numerical_columns).append(date_columns),
                    "dtype" : ['O']*len(catagorical_columns) + ['Number']*len(numerical_columns) + ['Date']*len(date_columns)})
        print(dtypes[dtypes.Column.isin(list(dtypes[dtypes.Column.duplicated()].Column.values))].to_string())
    #===
    x = df.columns.difference(
        catagorical_columns.append(numerical_columns).append(date_columns)
        )
    if x.size:
        new_line()
        print(colored("Some columns not included in any existing catagory, those:\n", 'red'))
        for i in x:
            print(f"\t<{i}, with dtype of <{df[i].dtype}>")
    #===
    dtypes = pd.DataFrame({"Column" : catagorical_columns.append(numerical_columns).append(date_columns),
                "dtype" : ['Object']*len(catagorical_columns) + ['Number']*len(numerical_columns) + ['Date']*len(date_columns)})
    return dtypes
#===
dtypes = DTYPES()
# ----------------------------------------------------------------------- Feature enginearing
# ======= Adding date columns
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> add polynomial, sqrt, tree, log features
def add_new_date_cols(x, suffix):
    d = {}
    d[suffix + '_week_normalized'] = x.dt.week / 52
    d[suffix + '_week_str'] = '"' + x.dt.week.apply(lambda x:np.nan if np.isnan(x) else str(x).replace(".0", "")) + '"'

    d[suffix + '_year_after_min_year'] = x.dt.year - x.dt.year.min()
    d[suffix + '_year_str'] = '"' + x.dt.year.apply(lambda x:np.nan if np.isnan(x) else str(x).replace(".0", "")) + '"'

    d[suffix + '_day_name']  = x.dt.day_name()

    d[suffix + '_day_after_min_date_str']  = '"' + (x - x.min()).apply(lambda x: str(x).split()[0]) + '"'

    d[suffix + '_day_normalized'] = x.dt.day / 31

    d[suffix + '_hour_normalized'] = x.dt.hour / 24
    d[suffix + '_hour_str'] = '"' + x.dt.hour.apply(lambda x:np.nan if np.isnan(x) else str(x).replace(".0", "")) + '"'

    d[suffix + '_month_name'] = x.dt.month_name()
    d[suffix + '_month_normalized'] = x.dt.month/12
    for k,v in d.items():
        if v.nunique() > 1:
            df[k] = v
    return df.drop(columns=x.name)
    # return df

len_df_before_adding_date_vars = df.shape[1]
for date_col in date_columns:
    df = add_new_date_cols(df[date_col], date_col)
len_df_after_adding_date_vars  = df.shape[1]
if len_df_after_adding_date_vars > len_df_before_adding_date_vars:
    new_line()
    to_print = f"Added {len_df_after_adding_date_vars - len_df_before_adding_date_vars} date Features"
    print(colored(to_print, 'red'))

# ======= type casting of numerical variable (those who have < 4% unique values) to catagorical variables
f = (df.select_dtypes("number").nunique() / len(df) * 100).where(lambda x:x<4).dropna().index
if f.size:
    len_df_before_adding_date_vars = df.shape[1]
    for col_num_to_str in f:
        df[col_num_to_str+"_str"] = '"' + df[col_num_to_str].astype(str) + '"'
    len_df_after_adding_date_vars  = df.shape[1]
    new_line()
    to_print = f"Added {len_df_after_adding_date_vars - len_df_before_adding_date_vars} String Features (Extracted from numerical variables)"
    print(colored(to_print, 'red'))
# =======
for var in df.select_dtypes("O").columns:
    m = cluping_rare_cases_in_one_catagory(var)
    if isinstance(m, pd.core.series.Series):
        df[var] = m
new_line()

xx = (df == 'Rare cases').sum().sort_values().where(lambda x:x>0).dropna()
xx = pd.DataFrame({"Count" : xx,
                "Ratio" : round(xx/len(df)*100, 4)})
print(f"<Rare case> catagory:\n{xx.to_string()}")
# ----------------------------------------------------------------------- END (Feature enginearing)
dtypes = DTYPES()
# ---------------------------------------------------- Correlation plot
new_line()
cor_df = df.select_dtypes('number').corr().abs()
mask = np.triu(np.ones_like(cor_df, dtype=bool));
f, ax = plt.subplots(figsize=(17, 10));
cmap = sns.color_palette("viridis", as_cmap=True);
plot_ = sns.heatmap(cor_df, mask=mask, cmap=cmap, vmax=.3, square=True, linewidths=.5, cbar_kws={"shrink": .5});
plot_.axes.set_title("abs (Correlation) plot",fontsize=25);
plt.show()
# ---------------------------------------------------------------------
#===
# m = 0
for row in dtypes.iterrows():
    # m += 1
    # if m == 3:
        # break
    column_name, type_ = row[1]
    x = df[column_name]
    to_print = f"\n\n\n========================================= {column_name} =========================================\n\n"
    print(colored(to_print, 'red'))

    for col_ in df.columns:
        if col_ == column_name:
            continue
        if df[col_].nunique() == df[column_name].nunique():
            unique_combination = df[[col_, column_name]].drop_duplicates()
            if unique_combination.apply(lambda x:x.is_unique).sum() == 2:
                new_line()
                to_print = f"This Columns is duplicate of <{col_}> column"
                print(colored(to_print, 'red'))

    # print(f"Column Type     : {type_}")
    print(f"Column Type     : ", end="")
    print(colored(type_, 'red'))
    if x.isna().all():
        new_line()
        df.drop(columns=column_name, inplace=True)
        print(colored("We dropped This column, because it is all Empty", 'red'))
        continue
    if type_ in ["O", "Date"]:
        if x.is_unique:
            new_line()
            df.drop(columns=column_name, inplace=True)
            to_print = f"We dropped This column, because it's a {type_} columns, and it's all values are unique"
            print(colored(to_print, 'red'))
            continue
    if x.nunique() == 1:
        new_line()
        df.drop(columns=column_name, inplace=True)
        print(colored("We dropped This column, because There is only one unique value", 'red'))
        continue

    if type_ == "Number":
        local_cor = cor_df[column_name].drop(column_name).reset_index()
        local_cor = local_cor.reindex(local_cor[column_name].abs().sort_values().index)
        if local_cor[column_name].max() == 1:
            new_line()
            to_print = f"This column is perfactly correlated with column <{local_cor[local_cor[column_name] == 1]['index'].values[0]}, so remove one of them"
            print(colored(to_print, 'red'))

        new_line()
        xm = local_cor[-3:].rename(columns={'index' : 'Column name', column_name : 'Correlation'}).reset_index(drop=True)
        xm.index = xm['Column name']
        xm.drop(columns="Column name", inplace=True);
        xm.plot(kind='barh', grid=True, figsize=(10,1.5));
        plt.title("Most 3 correlated features with this columns (sorted)", size=14);
        plt.xlabel("Correlation", size=12);
        plt.show();

        new_line()
        skewness = x.skew(skipna = True)
        if abs(skewness) < 0.5:
            print(f"The data is fairly symmetrical (skewness is: {skewness})")
        elif abs(skewness) < 1:
            print(f"The data are moderately skewed (skewness is: {skewness})")
        else:
            to_print = f"The data are highly skewed (skewness is: {skewness})\nNote: When skewness exceed |1| we called it highly skewed"
            print(colored(to_print, 'red'))

        # f = x.describe()
        # f['Nunique'] = x.nunique()
        # f['Nunique ratio'] = f.loc["Nunique"] / f.loc["count"] * 100
        # f['Outlies count'] = (((x - x.mean())/x.std()).abs() > 3).sum()
        # f['Outlies ratio'] = f.loc["Outlies count"] / f.loc["count"] * 100
        # f['Nagative values count'] = (x < 0).sum()
        # f['Nagative values ratio'] = f['Nagative values count'] / f['count'] * 100

        ff = [x.count(), x.isna().sum(), x.mean(), x.std(), x.min()]
        ff += x.quantile([.25,.5,.75]).to_list()
        ff += [x.max(), x.nunique(), (((x - x.mean())/x.std()).abs() > 3).sum(), (x < 0).sum(), (x == 0).sum()]

        f = pd.DataFrame(ff, index=['Count', 'NA', 'Mean', 'Std', 'Min', '25%', '50%', '75%', 'Max', 'Nunique', 'Outlies', 'Nagetive', 'Zeros'], columns=['Count'])
        f['Ratio'] = f.Count / x.count() * 100
        f.loc['Mean' : 'Max', 'Ratio'] = None

        new_line()
        print(f.round(2).to_string())
        plot_numerical_columns(column_name)

    elif type_ == "Object":
        # f = x.describe()
        # f = x.agg(['count', pd.Series.nunique])
        # f['len'] = len(x)
        # f['Na count'] = x.isna().sum()
        # f['Na ratio'] = f['Na count'] / f['count'] * 100
        # f['Most frequent'] = x.mode().values[0]
        # f['Most frequent count'] = (x == f['Most frequent']).sum()
        # f['Most frequent ratio'] = f['Most frequent count'] / f['count'] * 100
        # f['Least frequent'] = x.value_counts().tail(1).index[0]
        # f['Least frequent count'] = (x == f['Least frequent']).sum()
        # f['Least frequent ratio'] = f['Least frequent count'] / f['count'] * 100
        # f['Values occured only once count'] = x.value_counts().where(lambda x:x==1).dropna().size
        # f['Values occured only once Ratio'] = f['Values occured only once count'] / x.count() * 100

        l = x.count(), x.nunique(), len(x), x.isna().sum(), (x == x.mode().values[0]).sum(), (x == x.value_counts().tail(1).index[0]).sum(), x.value_counts().where(lambda x:x==1).dropna().size
        f = pd.DataFrame(l, index=['Count', 'Nunique', 'Len', 'NA', 'Most frequent', 'Least frequent', 'Values occured only once'], columns=['Counts'])
        f['Ratio'] = (f.Counts / x.count() * 100).round(4)
        f.loc[['Len'], 'Ratio'] = None

        new_line()
        print(f.to_string())


        if x.str.lower().nunique() != x.nunique():
            new_line()
            to_print = f"Case issue\n\tin orignal variable There are {x.nunique()} unique values\n\tin lower verstion there are   {x.str.lower().nunique()} unique values.\n"
            print(colored(to_print, 'red'))

        if x.str.strip().nunique() != x.nunique():
            new_line()
            to_print = f"Space issue\n\tin orignal variable There are {x.nunique()} unique values\n\tin striped verstion there are {x.str.strip().nunique()} unique values."
            print(colored(to_print, 'red'))

        plot_catagorical_columns(column_name)

    elif type == "Date":

        new_line()
        rd = relativedelta.relativedelta( pd.to_datetime(x.max()), pd.to_datetime(x.min()))
        to_print = f"Diffrenece between first and last date:\n\tYears : {rd.years}\n\tMonths: {rd.months}\n\tDays  : {rd.days}"
        print(colored(to_print, 'red'))

        # f = pd.Series({'Count' : x.count(),
        #             'Nunique count' : x.nunique(),
        #             'Nunique ratio' : x.nunique() / x.count() * 100,
        #             'Most frequent value' : str(x.mode()[0]),
        #             'Least frequent value' :  x.value_counts().tail(1).index[0]
        #             })
        # f['Most frequent count'] = (x == f['Most frequent value']).sum()
        # f['Most frequent ratio'] = f['Most frequent count'] / f['Count'] * 100
        # f['Least frequent count'] = (x == f['Least frequent value']).sum()
        # f['Least frequent ratio'] = f['Least frequent count'] / f['Count'] * 100
        # f['Values occured only once count'] = x.value_counts().where(lambda x:x==1).dropna().size
        # f['Values occured only once Ratio'] = f['Values occured only once count'] / x.count() * 100

        ff = x.count(), x.nunique(), (x == x.mode().values[0]).sum(), (x == x.value_counts().tail(1).index[0]).sum(), x.value_counts().where(lambda x:x==1).dropna().size
        f = pd.DataFrame(ff, index=["Count", 'Nunique', 'Most frequent values', 'Least frequent values', 'Values occured only once count'], columns=['Counts'])
        f['Ratio'] = (f.Counts / x.count() * 100).round(4)

        new_line()
        print(f"\n{f.to_string()}")


        f = set(np.arange(x.dt.year.min(),x.dt.year.max()+1)).difference(
            x.dt.year.unique())
        if f:
            new_line()
            print(colored("These Years (in order) are missing:\n", 'red'))
            for i in f:
                print("\t", i, end=", ")

        f = set(np.arange(x.dt.month.min(),x.dt.month.max()+1)).difference(
            x.dt.month.unique())
        if f:
            new_line()
            print(colored("These Months (in order) are missing:\n", 'red'))
            for i in f:
                print("\t", i, end=", ")

        f = set(np.arange(x.dt.day.min(),x.dt.day.max()+1)).difference(
            x.dt.day.unique())
        if f:
            new_line()
            print(colored("These Days (in order) are missing:\n", 'red'))
            for i in f:
                print("\t", i, end=", ")

        new_line()
        plot_date_columns(column_name)


# ================================================================================================================ Modeling
print("\n\n")
print("----------------------------------------------------------------------------------------------")
print("****************************************** Modeling ******************************************")

# Regression problem
if df[target_variable].dtype in [float, int]:

    print("\n-------------------- This is Regression problem --------------------\n")
    print("''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''")

    df_T = df.select_dtypes("number")
    cat_cols = pd.get_dummies(df.select_dtypes(exclude="number"), prefix_sep="__")
    df_T[cat_cols.columns.to_list()] = cat_cols

    df = df_T.copy("deep")
    del df_T
    del cat_cols
    # ====
    train_X, test_X, train_y, test_y = train_test_split(df.drop(columns=target_variable), df[target_variable])
    # ====
    # --------------------------------------------------------- Linear regression
    to_print = "\n ------------------------------------- Linear Regression -------------------------------------\n"
    print(colored(to_print, 'red'))

    model_reg = OLS(train_y, train_X).fit()
    summary = model_reg.summary()
    summary_df = pd.DataFrame(summary.tables[1])
    summary_df.columns = summary_df.iloc[0]
    summary_df.drop(0, inplace=True)
    summary_df.columns = summary_df.columns.astype(str)
    summary_df.columns = ["Variable"] + summary_df.columns[1:].to_list()
    for i in summary_df.columns[1:]:
        summary_df[i] = summary_df[i].astype(str).astype(float)
    summary_df.Variable = summary_df.Variable.astype(str)
    summary_df['Indicator'] = summary_df['P>|t|'].apply(lambda x:"***" if x < 0.001 else "**" if x < 0.01 else "*" if x < 0.05 else "." if x < 0.1  else "")
    summary_df = summary_df.sort_values("Variable").reset_index(drop=True)
    summary_df.to_csv()
    new_line()
    print(colored("NOTE: This summary saved as <summary_OLS_1.csv>", 'red'))

    new_line()
    print(summary_df.to_string())
    # ============================= Model statistic
    predictions = model_reg.predict(test_X)

    new_line()
    print(colored(" --- Model statistic --- \n", 'red'))
    print(f"R-squared         : {round(model_reg.rsquared, 3)}")
    print(f"Adj. R-squared    : {round(model_reg.rsquared_adj, 3)}")
    print(f"F-statistic       : {round(model_reg.fvalue)}")
    print(f"Prob (F-statistic): {model_reg.f_pvalue}")
    print(f"No. Observations  : {round(model_reg.nobs)}")
    print(f"AIC               : {round(model_reg.aic)}")
    print(f"Df Residuals      : {round(model_reg.df_resid)}")
    print(f"BIC               : {round(model_reg.bic)}")
    print(f"RMSE (test)       : {RMSE(predictions)}")
    # ======
    f = train_X.copy("deep")
    f['Errors__'] = model_reg.resid
    f = f.corr()['Errors__'].drop("Errors__").abs().sort_values().dropna().tail(1)
    new_line()
    print(f"Maximum correlation between Reseduals and any data columns is {f.values[0]}, with columns <{f.index[0]}>")
    print(f"Mean of train reseduals: {model_reg.resid.mean()}")
    del f
    # ============================= END (Model statistic)
    # --------------------------------------------------------- END Linear regression




    # --------------------------------------------------------- Random Forest
    print("\n ------------------------------------- Random Forest -------------------------------------\n")

    rf = RandomForestRegressor(n_estimators = 200, oob_score=True)
    model_rf = rf.fit(train_X, train_y);
    predictions_rf = rf.predict(test_X)

    new_line()
    print(colored("RF model peramters:\n", 'red'))
    pprint.pprint(model_rf.get_params())

    new_line()
    importances = list(rf.feature_importances_)
    feature_importances = [(feature, round(importance, 2)) for feature, importance in zip(test_X, importances)]
    featuresImportance = pd.Series(model_rf.feature_importances_, index=train_X.columns).sort_values(ascending=False)
    if len(featuresImportance) > 30:
        featuresImportance = featuresImportance.head(30)
    featuresImportance.plot(figsize=(20,10), kind='bar', grid=True);
    plt.title("RandomForest Feature importances Graph", size=18,color='red');
    plt.xlabel("Features", size=14, color='red');
    plt.ylabel("Importance", size=14, color='red');
    plt.show();
    del featuresImportance

    new_line()
    print(colored("--- Model statistic ---", 'red'))
    # The coefficient of determination R^2 of the prediction.
    # https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html
    print(f"R^2 (test) : {rf.score(test_X, test_y)}")
    print(f"R^2 (train): {rf.score(train_X, train_y)}")
    print(f"RMSE (test): {RMSE(predictions_rf)}")
    print(f"oob score  : {model_rf.oob_score_}")

    f = test_X.copy("deep")
    errors_rf = predictions_rf - test_y
    f['Errors__'] = errors_rf
    f = f.corr()['Errors__'].drop("Errors__").abs().sort_values().dropna().tail(1)
    new_line()
    print(f"Maximum correlation between Reseduals and any data columns is {f.values[0]}, with columns <{f.index[0]}>")
    # --------------------------------------------------------- END Random Forest
elif df[target_variable].dtype == "O":
    # Classififcation problem
    if df[target_variable].nunique() == 2:
        print("\n-------------------- This is Binary Classification problem --------------------\n")
        print("''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''")
        df = pd.concat([
                        df.select_dtypes(exclude = "O"),
                        pd.get_dummies(df.drop(columns=target_variable).select_dtypes("O")),
                        df[[target_variable]]
                        ], 1)

        train_X, test_X, train_y, test_y = train_test_split(df.drop(columns=target_variable), df[target_variable])
        clf = LogisticRegression().fit(train_X, train_y)
        predictions = clf.predict_proba(test_X)
        predictions = pd.Series(predictions[:, 0])
        lst = []
        for thresh in np.linspace(predictions.min(), predictions.max(), 50)[1:]:
            pred = predictions < thresh

            pred.loc[pred == True] = clf.classes_[0]
            pred.loc[pred == False] = clf.classes_[1]

            test_y = test_y.reset_index(drop=True)

            TN = ((pred == clf.classes_[0]) & (test_y == clf.classes_[0])).sum()
            TP = ((pred == clf.classes_[1]) & (test_y == clf.classes_[1])).sum()
            FN = ((pred == clf.classes_[0]) & (test_y == clf.classes_[1])).sum()
            FP = ((pred == clf.classes_[1]) & (test_y == clf.classes_[0])).sum()

            p = TP / (TP + FP)
            r = TP / (TP + FN)
            f = 2 * ((p * r) / (p+r))

            lst.append((thresh, (pred == test_y).mean(), p, r , f))

        d = pd.DataFrame(lst, columns=["Thresold", "Accurecy(0-1)", "Precision", "Recall", "F1"])
        d = d.set_index("Thresold")
        d.plot(grid=True, figsize=(18,7));
        plt.title("Model performance at diffrent Thresolds", size=18, color='red');
        plt.xlabel("Thresold", size=14, color='red');
        plt.ylabel("");
        plt.show()
    else:
        to_print = "\n-------------------- This is Multiclass Classification problem --------------------\n"
        print(colored(to_print, 'red'))
        print("'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''")

        df.loc[:, df.select_dtypes("O").columns] = df.select_dtypes("O").apply(lambda x: pd.Series(LabelEncoder().fit_transform(x.astype(str))).astype(str))
        train_X, test_X, train_y, test_y = train_test_split(df.drop(columns=target_variable), df[target_variable])

        clf=RandomForestClassifier(n_estimators=1000).fit(train_X, train_y)
        predictions = clf.predict(test_X)
        feature_imp = pd.Series(clf.feature_importances_,index=train_X.columns).sort_values(ascending=False)
        if feature_imp.size > 30:
            feature_imp = feature_imp.head(30)
        feature_imp.plot(kind='barh', figsize=(17,10), grid=True);
        plt.title("Feature importances Graph", size=18, color='red');
        plt.xlabel("Importance", size=14, color='red');
        plt.ylabel("Feature", size=14, color='red');
        plt.show()
        # ====
        f = (test_y, predictions)
        f_int = (test_y.astype(int), predictions.astype(int))

        print(f"accuracy_score: {metrics.accuracy_score(*f)}")
        print(f"f1_score: {metrics.f1_score(*f_int)}")

        metrics.plot_roc_curve(clf, test_X, test_y);
        plt.title("ROC curve plot");
        plt.show();

        metrics.ConfusionMatrixDisplay(metrics.confusion_matrix(*f)); plt.show()

        metrics.plot_confusion_matrix(clf, test_X, test_y);
        plt.title("Confusion matrix");
        plt.show()

        metrics.plot_precision_recall_curve(clf, test_X, test_y);
        plt.title("Precision recall curve");
        plt.show()
# ================================================================================================================ END Modeling
-------------------------

The Data have:
	2919 rows
	81 columns


-------------------------

Columns types distribution:

object     43
int64      26
float64    12
dtype: int64

-------------------------

There are 1459 NAs in target values, we droped those rows

-------------------------

There are 19 (out of 81, [23%]) columns that contains 1 or more NA.

-------------------------

19 NA_indicator variables added to the data

========= NA Graphs =========

-------------------------

-------------------------

NA columns data type Distribution:

object     16
float64     3
dtype: int64

-------------------------

NaN Ratio (0-100)

Electrical       0.068493
MasVnrType       0.547945
MasVnrArea       0.547945
BsmtQual         2.534247
BsmtCond         2.534247
BsmtFinType1     2.534247
BsmtExposure     2.602740
BsmtFinType2     2.602740
GarageCond       5.547945
GarageQual       5.547945
GarageFinish     5.547945
GarageType       5.547945
GarageYrBlt      5.547945
LotFrontage     17.739726
FireplaceQu     47.260274
Fence           80.753425
Alley           93.767123
MiscFeature     96.301370
PoolQC          99.520548
dtype: float64

-------------------------

(Before Missing values treatment)
There are 6965 Missing values:
	6617 in catagorical variables
	348 in numerical columns
	0.0 in others

(After filling numeric missing values)
There are 6617 Missing values:
	6617 in catagorical variables
	0 in numerical columns
	0.0 in others

-------------------------

Missing values imputed, Now there are 0 Missing values

-------------------------

There are 1 column that have all unique values, so no value repeatation, we droped it column.
Dropped column name are:

	 Id

-------------------------

Now The Data have:
	1460 rows
	99 columns


-------------------------

Added 18 String Features (Extracted from numerical variables)

-------------------------

The column <Street> have imbalanced, so we droped it, it has 2 unique values, and most commont value frequency ratio is 0.9958904109589041

-------------------------

The column <Utilities> have imbalanced, so we droped it, it has 2 unique values, and most commont value frequency ratio is 0.9993150684931507

-------------------------

The column <Electrical_NA_indicator> have imbalanced, so we droped it, it has 2 unique values, and most commont value frequency ratio is 0.9993150684931507

-------------------------

The column <PoolQC_NA_indicator> have imbalanced, so we droped it, it has 2 unique values, and most commont value frequency ratio is 0.9952054794520548

-------------------------

The column <PoolArea_str> have imbalanced, so we droped it, it has 8 unique values, and most commont value frequency ratio is 0.9952054794520548

-------------------------

<Rare case> catagory:
                         Count   Ratio
HouseStyle                 8.0  0.5479
MasVnrType_NA_indicator    8.0  0.5479
MasVnrArea_NA_indicator    8.0  0.5479
FullBath_str               9.0  0.6164
Foundation                 9.0  0.6164
RoofStyle                  9.0  0.6164
Neighborhood              11.0  0.7534
Heating                   14.0  0.9589
BedroomAbvGr_str          14.0  0.9589
Condition1                15.0  1.0274
Condition2                15.0  1.0274
RoofMatl                  15.0  1.0274
Exterior2nd               17.0  1.1644
3SsnPorch_str             24.0  1.6438
LowQualFinSF_str          26.0  1.7808
SaleType                  28.0  1.9178
MiscVal_str               41.0  2.8082

-------------------------




========================================= MSZoning =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1151   78.8356
Least frequent                10    0.6849
Values occured only once       0    0.0000



========================================= Alley =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent                832   56.9863
Least frequent               628   43.0137
Values occured only once       0    0.0000



========================================= LotShape =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                925   63.3562
Least frequent                10    0.6849
Values occured only once       0    0.0000



========================================= LandContour =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1311   89.7945
Least frequent                36    2.4658
Values occured only once       0    0.0000



========================================= LotConfig =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1056   72.3288
Least frequent                47    3.2192
Values occured only once       0    0.0000



========================================= LandSlope =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1382   94.6575
Least frequent                13    0.8904
Values occured only once       0    0.0000



========================================= Neighborhood =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                       24    1.6438
Len                         1460       NaN
NA                             0    0.0000
Most frequent                225   15.4110
Least frequent                11    0.7534
Values occured only once       0    0.0000



========================================= Condition1 =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        7    0.4795
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1260   86.3014
Least frequent                11    0.7534
Values occured only once       0    0.0000



========================================= Condition2 =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1445   98.9726
Least frequent                15    1.0274
Values occured only once       0    0.0000



========================================= BldgType =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1220   83.5616
Least frequent                31    2.1233
Values occured only once       0    0.0000



========================================= HouseStyle =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        8    0.5479
Len                         1460       NaN
NA                             0    0.0000
Most frequent                726   49.7260
Least frequent                 8    0.5479
Values occured only once       0    0.0000



========================================= RoofStyle =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1141   78.1507
Least frequent                 9    0.6164
Values occured only once       0    0.0000



========================================= RoofMatl =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1434   98.2192
Least frequent                11    0.7534
Values occured only once       0    0.0000



========================================= Exterior1st =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                       10    0.6849
Len                         1460       NaN
NA                             0    0.0000
Most frequent                522   35.7534
Least frequent                20    1.3699
Values occured only once       0    0.0000



========================================= Exterior2nd =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                       12    0.8219
Len                         1460       NaN
NA                             0    0.0000
Most frequent                504   34.5205
Least frequent                10    0.6849
Values occured only once       0    0.0000



========================================= MasVnrType =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                864   59.1781
Least frequent                16    1.0959
Values occured only once       0    0.0000



========================================= ExterQual =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                906   62.0548
Least frequent                14    0.9589
Values occured only once       0    0.0000



========================================= ExterCond =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1286   88.0822
Least frequent                28    1.9178
Values occured only once       0    0.0000



========================================= Foundation =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent                647   44.3151
Least frequent                 9    0.6164
Values occured only once       0    0.0000



========================================= BsmtQual =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                663   45.4110
Least frequent                53    3.6301
Values occured only once       0    0.0000



========================================= BsmtCond =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1336   91.5068
Least frequent                58    3.9726
Values occured only once       0    0.0000



========================================= BsmtExposure =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                955   65.4110
Least frequent               116    7.9452
Values occured only once       0    0.0000



========================================= BsmtFinType1 =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        6    0.4110
Len                         1460       NaN
NA                             0    0.0000
Most frequent                467   31.9863
Least frequent                74    5.0685
Values occured only once       0    0.0000



========================================= BsmtFinType2 =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        6    0.4110
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1293   88.5616
Least frequent                14    0.9589
Values occured only once       0    0.0000



========================================= Heating =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1428   97.8082
Least frequent                14    0.9589
Values occured only once       0    0.0000



========================================= HeatingQC =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                742   50.8219
Least frequent                49    3.3562
Values occured only once       0    0.0000



========================================= CentralAir =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1365   93.4932
Least frequent                95    6.5068
Values occured only once       0    0.0000



========================================= Electrical =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1339   91.7123
Least frequent                27    1.8493
Values occured only once       0    0.0000



========================================= KitchenQual =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                735   50.3425
Least frequent                39    2.6712
Values occured only once       0    0.0000



========================================= Functional =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1366   93.5616
Least frequent                14    0.9589
Values occured only once       0    0.0000



========================================= FireplaceQu =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent                701   48.0137
Least frequent                34    2.3288
Values occured only once       0    0.0000



========================================= GarageType =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent                903   61.8493
Least frequent                11    0.7534
Values occured only once       0    0.0000



========================================= GarageFinish =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent                676   46.3014
Least frequent               356   24.3836
Values occured only once       0    0.0000



========================================= GarageQual =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1329   91.0274
Least frequent                15    1.0274
Values occured only once       0    0.0000



========================================= GarageCond =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1366   93.5616
Least frequent                10    0.6849
Values occured only once       0    0.0000



========================================= PavedDrive =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1340   91.7808
Least frequent                30    2.0548
Values occured only once       0    0.0000



========================================= PoolQC =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent                865   59.2466
Least frequent               159   10.8904
Values occured only once       0    0.0000



========================================= Fence =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                688   47.1233
Least frequent                39    2.6712
Values occured only once       0    0.0000



========================================= MiscFeature =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1371   93.9041
Least frequent                10    0.6849
Values occured only once       0    0.0000



========================================= SaleType =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1267   86.7808
Least frequent                28    1.9178
Values occured only once       0    0.0000



========================================= SaleCondition =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1202   82.3288
Least frequent                12    0.8219
Values occured only once       0    0.0000



========================================= LotFrontage_NA_indicator =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1201   82.2603
Least frequent               259   17.7397
Values occured only once       0    0.0000



========================================= Alley_NA_indicator =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1369   93.7671
Least frequent                91    6.2329
Values occured only once       0    0.0000



========================================= MasVnrType_NA_indicator =========================================



-------------------------

This Columns is duplicate of <MasVnrArea_NA_indicator> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1452   99.4521
Least frequent                 8    0.5479
Values occured only once       0    0.0000



========================================= MasVnrArea_NA_indicator =========================================



-------------------------

This Columns is duplicate of <MasVnrType_NA_indicator> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1452   99.4521
Least frequent                 8    0.5479
Values occured only once       0    0.0000



========================================= BsmtQual_NA_indicator =========================================



-------------------------

This Columns is duplicate of <BsmtCond_NA_indicator> column

-------------------------

This Columns is duplicate of <BsmtFinType1_NA_indicator> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1423   97.4658
Least frequent                37    2.5342
Values occured only once       0    0.0000



========================================= BsmtCond_NA_indicator =========================================



-------------------------

This Columns is duplicate of <BsmtQual_NA_indicator> column

-------------------------

This Columns is duplicate of <BsmtFinType1_NA_indicator> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1423   97.4658
Least frequent                37    2.5342
Values occured only once       0    0.0000



========================================= BsmtExposure_NA_indicator =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1422   97.3973
Least frequent                38    2.6027
Values occured only once       0    0.0000



========================================= BsmtFinType1_NA_indicator =========================================



-------------------------

This Columns is duplicate of <BsmtQual_NA_indicator> column

-------------------------

This Columns is duplicate of <BsmtCond_NA_indicator> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1423   97.4658
Least frequent                37    2.5342
Values occured only once       0    0.0000



========================================= BsmtFinType2_NA_indicator =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1422   97.3973
Least frequent                38    2.6027
Values occured only once       0    0.0000



========================================= FireplaceQu_NA_indicator =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent                770   52.7397
Least frequent               690   47.2603
Values occured only once       0    0.0000



========================================= GarageType_NA_indicator =========================================



-------------------------

This Columns is duplicate of <GarageYrBlt_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageFinish_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageQual_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageCond_NA_indicator> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1379   94.4521
Least frequent                81    5.5479
Values occured only once       0    0.0000



========================================= GarageYrBlt_NA_indicator =========================================



-------------------------

This Columns is duplicate of <GarageType_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageFinish_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageQual_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageCond_NA_indicator> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1379   94.4521
Least frequent                81    5.5479
Values occured only once       0    0.0000



========================================= GarageFinish_NA_indicator =========================================



-------------------------

This Columns is duplicate of <GarageType_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageYrBlt_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageQual_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageCond_NA_indicator> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1379   94.4521
Least frequent                81    5.5479
Values occured only once       0    0.0000



========================================= GarageQual_NA_indicator =========================================



-------------------------

This Columns is duplicate of <GarageType_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageYrBlt_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageFinish_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageCond_NA_indicator> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1379   94.4521
Least frequent                81    5.5479
Values occured only once       0    0.0000



========================================= GarageCond_NA_indicator =========================================



-------------------------

This Columns is duplicate of <GarageType_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageYrBlt_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageFinish_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageQual_NA_indicator> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1379   94.4521
Least frequent                81    5.5479
Values occured only once       0    0.0000



========================================= Fence_NA_indicator =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1179   80.7534
Least frequent               281   19.2466
Values occured only once       0    0.0000



========================================= MiscFeature_NA_indicator =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1406   96.3014
Least frequent                54    3.6986
Values occured only once       0    0.0000



========================================= MSSubClass_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                       14    0.9589
Len                         1460       NaN
NA                             0    0.0000
Most frequent                540   36.9863
Least frequent                10    0.6849
Values occured only once       0    0.0000



========================================= OverallQual_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        8    0.5479
Len                         1460       NaN
NA                             0    0.0000
Most frequent                402   27.5342
Least frequent                18    1.2329
Values occured only once       0    0.0000



========================================= OverallCond_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        7    0.4795
Len                         1460       NaN
NA                             0    0.0000
Most frequent                827   56.6438
Least frequent                22    1.5068
Values occured only once       0    0.0000



========================================= LowQualFinSF_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1434   98.2192
Least frequent                26    1.7808
Values occured only once       0    0.0000



========================================= BsmtFullBath_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent                857   58.6986
Least frequent                15    1.0274
Values occured only once       0    0.0000



========================================= BsmtHalfBath_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1380   94.5205
Least frequent                80    5.4795
Values occured only once       0    0.0000



========================================= FullBath_str =========================================



-------------------------

This Columns is duplicate of <FullBath> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                768   52.6027
Least frequent                 9    0.6164
Values occured only once       0    0.0000



========================================= HalfBath_str =========================================



-------------------------

This Columns is duplicate of <HalfBath> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent                913   62.5342
Least frequent                12    0.8219
Values occured only once       0    0.0000



========================================= BedroomAbvGr_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        6    0.4110
Len                         1460       NaN
NA                             0    0.0000
Most frequent                804   55.0685
Least frequent                14    0.9589
Values occured only once       0    0.0000



========================================= KitchenAbvGr_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1395   95.5479
Least frequent                65    4.4521
Values occured only once       0    0.0000



========================================= TotRmsAbvGrd_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                       10    0.6849
Len                         1460       NaN
NA                             0    0.0000
Most frequent                404   27.6712
Least frequent                11    0.7534
Values occured only once       0    0.0000



========================================= Fireplaces_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent                695   47.6027
Least frequent               115    7.8767
Values occured only once       0    0.0000



========================================= GarageCars_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                829   56.7808
Least frequent                81    5.5479
Values occured only once       0    0.0000



========================================= 3SsnPorch_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1436   98.3562
Least frequent                24    1.6438
Values occured only once       0    0.0000



========================================= MiscVal_str =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1408   96.4384
Least frequent                11    0.7534
Values occured only once       0    0.0000



========================================= MoSold_str =========================================



-------------------------

This Columns is duplicate of <MoSold> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                       12    0.8219
Len                         1460       NaN
NA                             0    0.0000
Most frequent                253   17.3288
Least frequent                52    3.5616
Values occured only once       0    0.0000



========================================= YrSold_str =========================================



-------------------------

This Columns is duplicate of <YrSold> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent                338   23.1507
Least frequent               175   11.9863
Values occured only once       0    0.0000



========================================= MSSubClass =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 1.4076567471495591)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

           Count   Ratio
Count     1460.0  100.00
NA           0.0    0.00
Mean        56.9     NaN
Std         42.3     NaN
Min         20.0     NaN
25%         20.0     NaN
50%         50.0     NaN
75%         70.0     NaN
Max        190.0     NaN
Nunique     15.0    1.03
Outlies     30.0    2.05
Nagetive     0.0    0.00
Zeros        0.0    0.00



========================================= LotFrontage =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 2.0120008521763144)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean        70.75     NaN
Std         23.47     NaN
Min         21.00     NaN
25%         60.00     NaN
50%         70.00     NaN
75%         80.00     NaN
Max        313.00     NaN
Nunique    224.00   15.34
Outlies     14.00    0.96
Nagetive     0.00    0.00
Zeros        0.00    0.00



========================================= LotArea =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 12.207687851233496)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

              Count   Ratio
Count       1460.00  100.00
NA             0.00    0.00
Mean       10516.83     NaN
Std         9981.26     NaN
Min         1300.00     NaN
25%         7553.50     NaN
50%         9478.50     NaN
75%        11601.50     NaN
Max       215245.00     NaN
Nunique     1073.00   73.49
Outlies       13.00    0.89
Nagetive       0.00    0.00
Zeros          0.00    0.00



========================================= OverallQual =========================================


Column Type     : Number

-------------------------

-------------------------

The data is fairly symmetrical (skewness is: 0.2169439277628693)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         6.10     NaN
Std          1.38     NaN
Min          1.00     NaN
25%          5.00     NaN
50%          6.00     NaN
75%          7.00     NaN
Max         10.00     NaN
Nunique     10.00    0.68
Outlies      2.00    0.14
Nagetive     0.00    0.00
Zeros        0.00    0.00



========================================= OverallCond =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: 0.6930674724842182)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         5.58     NaN
Std          1.11     NaN
Min          1.00     NaN
25%          5.00     NaN
50%          5.00     NaN
75%          6.00     NaN
Max          9.00     NaN
Nunique      9.00    0.62
Outlies     28.00    1.92
Nagetive     0.00    0.00
Zeros        0.00    0.00



========================================= YearBuilt =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: -0.613461172488183)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean      1971.27     NaN
Std         30.20     NaN
Min       1872.00     NaN
25%       1954.00     NaN
50%       1973.00     NaN
75%       2000.00     NaN
Max       2010.00     NaN
Nunique    112.00    7.67
Outlies      6.00    0.41
Nagetive     0.00    0.00
Zeros        0.00    0.00



========================================= YearRemodAdd =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: -0.5035620027004709)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean      1984.87     NaN
Std         20.65     NaN
Min       1950.00     NaN
25%       1967.00     NaN
50%       1994.00     NaN
75%       2004.00     NaN
Max       2010.00     NaN
Nunique     61.00    4.18
Outlies      0.00    0.00
Nagetive     0.00    0.00
Zeros        0.00    0.00



========================================= MasVnrArea =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 2.6682455485578593)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean       103.84     NaN
Std        180.74     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%        166.00     NaN
Max       1600.00     NaN
Nunique    335.00   22.95
Outlies     32.00    2.19
Nagetive     0.00    0.00
Zeros      861.00   58.97



========================================= BsmtFinSF1 =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 1.685503071910789)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean       443.64     NaN
Std        456.10     NaN
Min          0.00     NaN
25%          0.00     NaN
50%        383.50     NaN
75%        712.25     NaN
Max       5644.00     NaN
Nunique    637.00   43.63
Outlies      6.00    0.41
Nagetive     0.00    0.00
Zeros      467.00   31.99



========================================= BsmtFinSF2 =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 4.255261108933303)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean        46.55     NaN
Std        161.32     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%          0.00     NaN
Max       1474.00     NaN
Nunique    144.00    9.86
Outlies     50.00    3.42
Nagetive     0.00    0.00
Zeros     1293.00   88.56



========================================= BsmtUnfSF =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: 0.9202684528039037)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean       567.24     NaN
Std        441.87     NaN
Min          0.00     NaN
25%        223.00     NaN
50%        477.50     NaN
75%        808.00     NaN
Max       2336.00     NaN
Nunique    780.00   53.42
Outlies     11.00    0.75
Nagetive     0.00    0.00
Zeros      118.00    8.08



========================================= TotalBsmtSF =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 1.5242545490627664)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean      1057.43     NaN
Std        438.71     NaN
Min          0.00     NaN
25%        795.75     NaN
50%        991.50     NaN
75%       1298.25     NaN
Max       6110.00     NaN
Nunique    721.00   49.38
Outlies     10.00    0.68
Nagetive     0.00    0.00
Zeros       37.00    2.53



========================================= 1stFlrSF =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 1.3767566220336365)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean      1162.63     NaN
Std        386.59     NaN
Min        334.00     NaN
25%        882.00     NaN
50%       1087.00     NaN
75%       1391.25     NaN
Max       4692.00     NaN
Nunique    753.00   51.58
Outlies     12.00    0.82
Nagetive     0.00    0.00
Zeros        0.00    0.00



========================================= 2ndFlrSF =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: 0.8130298163023265)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean       346.99     NaN
Std        436.53     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%        728.00     NaN
Max       2065.00     NaN
Nunique    417.00   28.56
Outlies      4.00    0.27
Nagetive     0.00    0.00
Zeros      829.00   56.78



========================================= LowQualFinSF =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 9.011341288465387)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         5.84     NaN
Std         48.62     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%          0.00     NaN
Max        572.00     NaN
Nunique     24.00    1.64
Outlies     20.00    1.37
Nagetive     0.00    0.00
Zeros     1434.00   98.22



========================================= GrLivArea =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 1.3665603560164552)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean      1515.46     NaN
Std        525.48     NaN
Min        334.00     NaN
25%       1129.50     NaN
50%       1464.00     NaN
75%       1776.75     NaN
Max       5642.00     NaN
Nunique    861.00   58.97
Outlies     16.00    1.10
Nagetive     0.00    0.00
Zeros        0.00    0.00



========================================= BsmtFullBath =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: 0.596066609663168)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         0.43     NaN
Std          0.52     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%          1.00     NaN
Max          3.00     NaN
Nunique      4.00    0.27
Outlies     16.00    1.10
Nagetive     0.00    0.00
Zeros      856.00   58.63



========================================= BsmtHalfBath =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 4.103402697955168)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         0.06     NaN
Std          0.24     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%          0.00     NaN
Max          2.00     NaN
Nunique      3.00    0.21
Outlies     82.00    5.62
Nagetive     0.00    0.00
Zeros     1378.00   94.38



========================================= FullBath =========================================



-------------------------

This Columns is duplicate of <FullBath_str> column
Column Type     : Number

-------------------------

-------------------------

The data is fairly symmetrical (skewness is: 0.036561558402727165)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         1.57     NaN
Std          0.55     NaN
Min          0.00     NaN
25%          1.00     NaN
50%          2.00     NaN
75%          2.00     NaN
Max          3.00     NaN
Nunique      4.00    0.27
Outlies      0.00    0.00
Nagetive     0.00    0.00
Zeros        9.00    0.62



========================================= HalfBath =========================================



-------------------------

This Columns is duplicate of <HalfBath_str> column
Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: 0.675897448233722)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         0.38     NaN
Std          0.50     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%          1.00     NaN
Max          2.00     NaN
Nunique      3.00    0.21
Outlies     12.00    0.82
Nagetive     0.00    0.00
Zeros      913.00   62.53



========================================= BedroomAbvGr =========================================


Column Type     : Number

-------------------------

-------------------------

The data is fairly symmetrical (skewness is: 0.21179009627507137)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         2.87     NaN
Std          0.82     NaN
Min          0.00     NaN
25%          2.00     NaN
50%          3.00     NaN
75%          3.00     NaN
Max          8.00     NaN
Nunique      8.00    0.55
Outlies     14.00    0.96
Nagetive     0.00    0.00
Zeros        6.00    0.41



========================================= KitchenAbvGr =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 4.488396777072859)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         1.05     NaN
Std          0.22     NaN
Min          0.00     NaN
25%          1.00     NaN
50%          1.00     NaN
75%          1.00     NaN
Max          3.00     NaN
Nunique      4.00    0.27
Outlies     68.00    4.66
Nagetive     0.00    0.00
Zeros        1.00    0.07



========================================= TotRmsAbvGrd =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: 0.6763408364355531)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         6.52     NaN
Std          1.63     NaN
Min          2.00     NaN
25%          5.00     NaN
50%          6.00     NaN
75%          7.00     NaN
Max         14.00     NaN
Nunique     12.00    0.82
Outlies     12.00    0.82
Nagetive     0.00    0.00
Zeros        0.00    0.00



========================================= Fireplaces =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: 0.6495651830548841)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         0.61     NaN
Std          0.64     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          1.00     NaN
75%          1.00     NaN
Max          3.00     NaN
Nunique      4.00    0.27
Outlies      5.00    0.34
Nagetive     0.00    0.00
Zeros      690.00   47.26



========================================= GarageYrBlt =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: -0.541264504372725)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean      1977.23     NaN
Std         24.78     NaN
Min       1900.00     NaN
25%       1960.00     NaN
50%       1978.00     NaN
75%       2001.00     NaN
Max       2010.00     NaN
Nunique    148.00   10.14
Outlies      1.00    0.07
Nagetive     0.00    0.00
Zeros        0.00    0.00



========================================= GarageCars =========================================


Column Type     : Number

-------------------------

-------------------------

The data is fairly symmetrical (skewness is: -0.3425489297486655)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         1.77     NaN
Std          0.75     NaN
Min          0.00     NaN
25%          1.00     NaN
50%          2.00     NaN
75%          2.00     NaN
Max          4.00     NaN
Nunique      5.00    0.34
Outlies      0.00    0.00
Nagetive     0.00    0.00
Zeros       81.00    5.55



========================================= GarageArea =========================================


Column Type     : Number

-------------------------

-------------------------

The data is fairly symmetrical (skewness is: 0.17998090674623907)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean       472.98     NaN
Std        213.80     NaN
Min          0.00     NaN
25%        334.50     NaN
50%        480.00     NaN
75%        576.00     NaN
Max       1418.00     NaN
Nunique    441.00   30.21
Outlies      7.00    0.48
Nagetive     0.00    0.00
Zeros       81.00    5.55



========================================= WoodDeckSF =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 1.5413757571931312)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean        94.24     NaN
Std        125.34     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%        168.00     NaN
Max        857.00     NaN
Nunique    274.00   18.77
Outlies     22.00    1.51
Nagetive     0.00    0.00
Zeros      761.00   52.12



========================================= OpenPorchSF =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 2.3643417403694404)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean        46.66     NaN
Std         66.26     NaN
Min          0.00     NaN
25%          0.00     NaN
50%         25.00     NaN
75%         68.00     NaN
Max        547.00     NaN
Nunique    202.00   13.84
Outlies     27.00    1.85
Nagetive     0.00    0.00
Zeros      656.00   44.93



========================================= EnclosedPorch =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 3.08987190371177)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean        21.95     NaN
Std         61.12     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%          0.00     NaN
Max        552.00     NaN
Nunique    120.00    8.22
Outlies     51.00    3.49
Nagetive     0.00    0.00
Zeros     1252.00   85.75



========================================= 3SsnPorch =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 10.304342032693112)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         3.41     NaN
Std         29.32     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%          0.00     NaN
Max        508.00     NaN
Nunique     20.00    1.37
Outlies     23.00    1.58
Nagetive     0.00    0.00
Zeros     1436.00   98.36



========================================= ScreenPorch =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 4.122213743143115)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean        15.06     NaN
Std         55.76     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%          0.00     NaN
Max        480.00     NaN
Nunique     76.00    5.21
Outlies     55.00    3.77
Nagetive     0.00    0.00
Zeros     1344.00   92.05



========================================= PoolArea =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 14.828373640750588)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         2.76     NaN
Std         40.18     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%          0.00     NaN
Max        738.00     NaN
Nunique      8.00    0.55
Outlies      7.00    0.48
Nagetive     0.00    0.00
Zeros     1453.00   99.52



========================================= MiscVal =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 24.476794188821916)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

             Count   Ratio
Count      1460.00  100.00
NA            0.00    0.00
Mean         43.49     NaN
Std         496.12     NaN
Min           0.00     NaN
25%           0.00     NaN
50%           0.00     NaN
75%           0.00     NaN
Max       15500.00     NaN
Nunique      21.00    1.44
Outlies       8.00    0.55
Nagetive      0.00    0.00
Zeros      1408.00   96.44



========================================= MoSold =========================================



-------------------------

This Columns is duplicate of <MoSold_str> column
Column Type     : Number

-------------------------

-------------------------

The data is fairly symmetrical (skewness is: 0.21205298505146022)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         6.32     NaN
Std          2.70     NaN
Min          1.00     NaN
25%          5.00     NaN
50%          6.00     NaN
75%          8.00     NaN
Max         12.00     NaN
Nunique     12.00    0.82
Outlies      0.00    0.00
Nagetive     0.00    0.00
Zeros        0.00    0.00



========================================= YrSold =========================================



-------------------------

This Columns is duplicate of <YrSold_str> column
Column Type     : Number

-------------------------

-------------------------

The data is fairly symmetrical (skewness is: 0.09626851386568028)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean      2007.82     NaN
Std          1.33     NaN
Min       2006.00     NaN
25%       2007.00     NaN
50%       2008.00     NaN
75%       2009.00     NaN
Max       2010.00     NaN
Nunique      5.00    0.34
Outlies      0.00    0.00
Nagetive     0.00    0.00
Zeros        0.00    0.00



========================================= SalePrice =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 1.8828757597682129)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

             Count   Ratio
Count       1460.0  100.00
NA             0.0    0.00
Mean      180921.2     NaN
Std        79442.5     NaN
Min        34900.0     NaN
25%       129975.0     NaN
50%       163000.0     NaN
75%       214000.0     NaN
Max       755000.0     NaN
Nunique      663.0   45.41
Outlies       22.0    1.51
Nagetive       0.0    0.00
Zeros          0.0    0.00


----------------------------------------------------------------------------------------------
****************************************** Modeling ******************************************

-------------------- This is Regression problem --------------------

''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''

 ------------------------------------- Linear Regression -------------------------------------


-------------------------

NOTE: This summary saved as <summary_OLS_1.csv>

-------------------------

                                   Variable        coef     std err      t  P>|t|      [0.025      0.975] Indicator
0                                  1stFlrSF     -7.8163      12.613 -0.620  0.536     -32.573      16.941          
1                                  2ndFlrSF     -2.3219      12.210 -0.190  0.849     -26.288      21.644          
2                                 3SsnPorch     23.3036      71.452  0.326  0.744    -116.945     163.552          
3                      3SsnPorch_str__"0.0"  -4718.8306    7531.629 -0.627  0.531  -19500.000   10100.000          
4                 3SsnPorch_str__Rare cases   4719.2140    7531.601  0.627  0.531  -10100.000   19500.000          
5               Alley_NA_indicator__Missing   1881.7931    2333.896  0.806  0.420   -2699.272    6462.858          
6           Alley_NA_indicator__Not missing  -1881.4097    2333.833 -0.806  0.420   -6462.351    2699.531          
7                               Alley__Grvl  -1980.6575    1611.039 -1.229  0.219   -5142.870    1181.555          
8                               Alley__Pave   1981.0409    1611.038  1.230  0.219   -1181.168    5143.250          
9                              BedroomAbvGr  -2580.5902    4887.255 -0.528  0.598  -12200.000    7012.310          
10                  BedroomAbvGr_str__"1.0"   3915.4579   11500.000  0.339  0.735  -18700.000   26600.000          
11                  BedroomAbvGr_str__"2.0"  -1833.5954    6657.038 -0.275  0.783  -14900.000   11200.000          
12                  BedroomAbvGr_str__"3.0"  -1220.4947    3655.794 -0.334  0.739   -8396.235    5955.245          
13                  BedroomAbvGr_str__"4.0"   8819.3901    5289.337  1.667  0.096   -1562.733   19200.000         .
14                  BedroomAbvGr_str__"5.0" -16250.0000   11200.000 -1.455  0.146  -38200.000    5678.966          
15             BedroomAbvGr_str__Rare cases   6573.8779   11600.000  0.569  0.570  -16100.000   29300.000          
16                           BldgType__1Fam   -657.1251   20200.000 -0.032  0.974  -40400.000   39000.000          
17                         BldgType__2fmCon  -9951.2253   51800.000 -0.192  0.848 -112000.000   91800.000          
18                         BldgType__Duplex   1505.5745    7519.922  0.200  0.841  -13300.000   16300.000          
19                          BldgType__Twnhs   3111.9733   19300.000  0.162  0.872  -34700.000   40900.000          
20                         BldgType__TwnhsE   5991.1861   18700.000  0.321  0.748  -30600.000   42600.000          
21           BsmtCond_NA_indicator__Missing  -8291.8782    7222.467 -1.148  0.251  -22500.000    5884.670          
22       BsmtCond_NA_indicator__Not missing   8292.2616    7222.380  1.148  0.251   -5884.115   22500.000          
23                             BsmtCond__Fa  -3676.5892    3877.294 -0.948  0.343  -11300.000    3933.919          
24                             BsmtCond__Gd  -1442.9844    3474.349 -0.415  0.678   -8262.577    5376.608          
25                             BsmtCond__TA   5119.9570    2415.074  2.120  0.034     379.554    9860.360         *
26       BsmtExposure_NA_indicator__Missing  -1671.5152   13300.000 -0.126  0.900  -27800.000   24500.000          
27   BsmtExposure_NA_indicator__Not missing   1671.8987   13300.000  0.126  0.900  -24500.000   27800.000          
28                         BsmtExposure__Av  -5300.5227    2173.416 -2.439  0.015   -9566.592   -1034.454         *
29                         BsmtExposure__Gd  14300.0000    3081.315  4.642  0.000    8256.641   20400.000       ***
30                         BsmtExposure__Mn   -849.4240    2617.012 -0.325  0.746   -5986.200    4287.352          
31                         BsmtExposure__No  -8154.4394    1850.988 -4.405  0.000  -11800.000   -4521.246       ***
32                               BsmtFinSF1     -9.1833       5.109 -1.797  0.073     -19.211       0.845         .
33                               BsmtFinSF2     19.6292      12.913  1.520  0.129      -5.716      44.975          
34       BsmtFinType1_NA_indicator__Missing  -8291.8782    7222.467 -1.148  0.251  -22500.000    5884.670          
35   BsmtFinType1_NA_indicator__Not missing   8292.2616    7222.380  1.148  0.251   -5884.115   22500.000          
36                        BsmtFinType1__ALQ   1955.8832    2450.428  0.798  0.425   -2853.914    6765.681          
37                        BsmtFinType1__BLQ     93.8689    2734.221  0.034  0.973   -5272.969    5460.707          
38                        BsmtFinType1__GLQ   5177.3103    2635.278  1.965  0.050       4.681   10300.000         .
39                        BsmtFinType1__LwQ  -3824.2204    3889.441 -0.983  0.326  -11500.000    3810.130          
40                        BsmtFinType1__Rec   1777.4885    2874.065  0.618  0.536   -3863.841    7418.818          
41                        BsmtFinType1__Unf  -5179.9471    2693.871 -1.923  0.055  -10500.000     107.691         .
42       BsmtFinType2_NA_indicator__Missing  16890.0000   14800.000  1.142  0.254  -12100.000   45900.000          
43   BsmtFinType2_NA_indicator__Not missing -16890.0000   14800.000 -1.142  0.254  -45900.000   12100.000          
44                        BsmtFinType2__ALQ   4078.9312    7352.669  0.555  0.579  -10400.000   18500.000          
45                        BsmtFinType2__BLQ  -5019.9093    5985.909 -0.839  0.402  -16800.000    6729.474          
46                        BsmtFinType2__GLQ  -3235.8921    9021.926 -0.359  0.720  -20900.000   14500.000          
47                        BsmtFinType2__LwQ  -3805.2154    5013.541 -0.759  0.448  -13600.000    6035.563          
48                        BsmtFinType2__Rec   1355.6873    4944.827  0.274  0.784   -8350.217   11100.000          
49                        BsmtFinType2__Unf   6626.7817    5153.801  1.286  0.199   -3489.306   16700.000          
50                             BsmtFullBath  -2399.8645   11500.000 -0.209  0.834  -24900.000   20100.000          
51                  BsmtFullBath_str__"0.0"   -455.1750   12500.000 -0.036  0.971  -25000.000   24100.000          
52                  BsmtFullBath_str__"1.0"   6997.9531    5080.298  1.377  0.169   -2973.859   17000.000          
53                  BsmtFullBath_str__"2.0"  -6542.3947   15200.000 -0.429  0.668  -36400.000   23400.000          
54                             BsmtHalfBath  -3826.3929   19800.000 -0.193  0.847  -42800.000   35100.000          
55                  BsmtHalfBath_str__"0.0"  -4964.3481   10100.000 -0.491  0.623  -24800.000   14900.000          
56                  BsmtHalfBath_str__"1.0"   4964.7315   10100.000  0.491  0.623  -14900.000   24800.000          
57           BsmtQual_NA_indicator__Missing  -8291.8782    7222.467 -1.148  0.251  -22500.000    5884.670          
58       BsmtQual_NA_indicator__Not missing   8292.2616    7222.380  1.148  0.251   -5884.115   22500.000          
59                             BsmtQual__Ex  10970.0000    4052.081  2.708  0.007    3017.628   18900.000        **
60                             BsmtQual__Fa  -3390.8948    4623.048 -0.733  0.463  -12500.000    5683.409          
61                             BsmtQual__Gd  -1835.0804    2557.445 -0.718  0.473   -6854.935    3184.775          
62                             BsmtQual__TA  -5744.8567    2565.290 -2.239  0.025  -10800.000    -709.602         *
63                                BsmtUnfSF     -7.9243       5.422 -1.462  0.144     -18.566       2.718          
64                            CentralAir__N   1363.9873    2766.086  0.493  0.622   -4065.397    6793.371          
65                            CentralAir__Y  -1363.6039    2765.951 -0.493  0.622   -6792.724    4065.516          
66                       Condition1__Artery    501.0982    5408.759  0.093  0.926  -10100.000   11100.000          
67                        Condition1__Feedr  -1991.9091    4393.967 -0.453  0.650  -10600.000    6632.746          
68                         Condition1__Norm  10210.0000    2900.867  3.520  0.000    4518.318   15900.000       ***
69                         Condition1__PosN -11100.0000    7577.986 -1.464  0.143  -26000.000    3777.323          
70                         Condition1__RRAe -18080.0000    8623.946 -2.097  0.036  -35000.000   -1157.538         *
71                         Condition1__RRAn  10990.0000    6214.327  1.769  0.077   -1204.244   23200.000         .
72                   Condition1__Rare cases   9467.4701    8345.441  1.134  0.257   -6913.296   25800.000          
73                         Condition2__Norm  15100.0000    5834.151  2.589  0.010    3651.727   26600.000         *
74                   Condition2__Rare cases -15100.0000    5834.273 -2.589  0.010  -26600.000   -3651.105         *
75                        Electrical__FuseA   1414.5526    3498.353  0.404  0.686   -5452.154    8281.259          
76                        Electrical__FuseF   -209.0680    5185.263 -0.040  0.968  -10400.000    9968.775          
77                        Electrical__SBrkr  -1205.1012    3226.098 -0.374  0.709   -7537.416    5127.214          
78                            EnclosedPorch     11.4234      17.562  0.650  0.516     -23.047      45.894          
79                            ExterCond__Fa   1309.2522    5044.204  0.260  0.795   -8591.713   11200.000          
80                            ExterCond__Gd   -616.5853    3258.776 -0.189  0.850   -7013.042    5779.871          
81                            ExterCond__TA   -692.2834    2715.557 -0.255  0.799   -6022.487    4637.920          
82                            ExterQual__Ex   3422.9907    6472.389  0.529  0.597   -9281.274   16100.000          
83                            ExterQual__Fa   -435.9133    9267.526 -0.047  0.962  -18600.000   17800.000          
84                            ExterQual__Gd   -639.7592    4011.968 -0.159  0.873   -8514.612    7235.093          
85                            ExterQual__TA  -2346.9347    3962.920 -0.592  0.554  -10100.000    5431.645          
86                     Exterior1st__AsbShng   -732.2229   12600.000 -0.058  0.954  -25500.000   24000.000          
87                     Exterior1st__BrkFace   8016.9312    7249.128  1.106  0.269   -6211.948   22200.000          
88                     Exterior1st__CemntBd  29630.0000   20900.000  1.416  0.157  -11400.000   70700.000          
89                     Exterior1st__HdBoard  -1485.4158    6579.234 -0.226  0.821  -14400.000   11400.000          
90                     Exterior1st__MetalSd  -5066.7012   12800.000 -0.397  0.692  -30100.000   20000.000          
91                     Exterior1st__Plywood   1509.9516    6500.300  0.232  0.816  -11200.000   14300.000          
92                      Exterior1st__Stucco -16550.0000   13000.000 -1.274  0.203  -42100.000    8950.587          
93                     Exterior1st__VinylSd   1102.5268    8812.046  0.125  0.900  -16200.000   18400.000          
94                     Exterior1st__Wd Sdng -10890.0000    5383.766 -2.022  0.043  -21500.000    -320.430         *
95                     Exterior1st__WdShing  -5537.2482    8052.948 -0.688  0.492  -21300.000   10300.000          
96                     Exterior2nd__AsbShng   -482.0447   12700.000 -0.038  0.970  -25400.000   24500.000          
97                     Exterior2nd__BrkFace   3891.3470    8948.807  0.435  0.664  -13700.000   21500.000          
98                     Exterior2nd__CmentBd -26980.0000   21700.000 -1.244  0.214  -69600.000   15600.000          
99                     Exterior2nd__HdBoard   2680.0019    6491.192  0.413  0.680  -10100.000   15400.000          
100                    Exterior2nd__ImStucc   2302.0800   13600.000  0.169  0.865  -24400.000   29000.000          
101                    Exterior2nd__MetalSd   5965.1701   13200.000  0.453  0.651  -19900.000   31800.000          
102                    Exterior2nd__Plywood   -791.6577    5758.935 -0.137  0.891  -12100.000   10500.000          
103                 Exterior2nd__Rare cases  -9567.5077   10100.000 -0.950  0.342  -29300.000   10200.000          
104                     Exterior2nd__Stucco   8630.7279   13400.000  0.643  0.521  -17700.000   35000.000          
105                    Exterior2nd__VinylSd   2886.0342    8742.957  0.330  0.741  -14300.000   20000.000          
106                    Exterior2nd__Wd Sdng  14370.0000    5340.237  2.691  0.007    3887.767   24900.000        **
107                    Exterior2nd__Wd Shng  -2902.1224    6624.503 -0.438  0.661  -15900.000   10100.000          
108             Fence_NA_indicator__Missing   -499.4858    1289.104 -0.387  0.699   -3029.792    2030.820          
109         Fence_NA_indicator__Not missing    499.8693    1289.178  0.388  0.698   -2030.582    3030.320          
110                            Fence__GdPrv  -2566.9148    2328.740 -1.102  0.271   -7137.860    2004.030          
111                             Fence__GdWo   2378.2791    2266.285  1.049  0.294   -2070.076    6826.635          
112                            Fence__MnPrv   -479.1651    1995.239 -0.240  0.810   -4395.500    3437.170          
113                             Fence__MnWw    668.1842    4528.590  0.148  0.883   -8220.714    9557.083          
114       FireplaceQu_NA_indicator__Missing   -783.3880     474.874 -1.650  0.099   -1715.489     148.713         .
115   FireplaceQu_NA_indicator__Not missing    783.7714     474.657  1.651  0.099    -147.904    1715.447         .
116                         FireplaceQu__Ex   2686.9854    4582.901  0.586  0.558   -6308.517   11700.000          
117                         FireplaceQu__Fa  -2358.3501    3089.400 -0.763  0.445   -8422.349    3705.648          
118                         FireplaceQu__Gd  -1545.6113    1986.968 -0.778  0.437   -5445.711    2354.489          
119                         FireplaceQu__Po   1266.0200    3601.625  0.352  0.725   -5803.394    8335.434          
120                         FireplaceQu__TA    -48.6606    2042.791 -0.024  0.981   -4058.333    3961.012          
121                              Fireplaces -19080.0000    5204.562 -3.667  0.000  -29300.000   -8868.198       ***
122                   Fireplaces_str__"0.0" -24120.0000    5217.722 -4.622  0.000  -34400.000  -13900.000       ***
123                   Fireplaces_str__"1.0"  -2681.0305    1449.449 -1.850  0.065   -5526.066     164.005         .
124                   Fireplaces_str__"2.0"  26800.0000    5579.261  4.803  0.000   15800.000   37700.000       ***
125                      Foundation__BrkTil  -5561.9282    4579.202 -1.215  0.225  -14600.000    3426.313          
126                      Foundation__CBlock    339.8427    4024.215  0.084  0.933   -7559.047    8238.733          
127                       Foundation__PConc   2334.4800    4276.886  0.546  0.585   -6060.364   10700.000          
128                  Foundation__Rare cases   4020.6915   11000.000  0.364  0.716  -17700.000   25700.000          
129                        Foundation__Slab  -1132.7025    9713.851 -0.117  0.907  -20200.000   17900.000          
130                                FullBath  13410.0000    6671.404  2.010  0.045     313.905   26500.000         *
131                     FullBath_str__"1.0"  -7851.7460    9063.448 -0.866  0.387  -25600.000    9938.354          
132                     FullBath_str__"2.0" -14840.0000    4550.693 -3.261  0.001  -23800.000   -5906.026        **
133                     FullBath_str__"3.0"  16980.0000    4299.205  3.949  0.000    8540.403   25400.000       ***
134                FullBath_str__Rare cases   5711.3818    9129.920  0.626  0.532  -12200.000   23600.000          
135                        Functional__Maj1 -10650.0000    8538.039 -1.247  0.213  -27400.000    6109.843          
136                        Functional__Min1   3364.9980    5772.905  0.583  0.560   -7966.291   14700.000          
137                        Functional__Min2  -2164.6055    5768.628 -0.375  0.708  -13500.000    9158.290          
138                         Functional__Mod  -1609.9158    8617.959 -0.187  0.852  -18500.000   15300.000          
139                         Functional__Typ  11060.0000    3799.584  2.911  0.004    3600.893   18500.000        **
140                              GarageArea    -10.3409      10.614 -0.974  0.330     -31.175      10.493          
141                              GarageCars  11020.0000    9096.434  1.212  0.226   -6832.742   28900.000          
142                   GarageCars_str__"0.0"    727.5351    1661.179  0.438  0.662   -2533.094    3988.164          
143                   GarageCars_str__"1.0"  -1816.1112    8809.166 -0.206  0.837  -19100.000   15500.000          
144                   GarageCars_str__"2.0"  -5723.7661    1790.085 -3.197  0.001   -9237.416   -2210.116        **
145                   GarageCars_str__"3.0"   6812.7256    9890.979  0.689  0.491  -12600.000   26200.000          
146        GarageCond_NA_indicator__Missing    727.5351    1661.179  0.438  0.662   -2533.094    3988.164          
147    GarageCond_NA_indicator__Not missing   -727.1516    1661.193 -0.438  0.662   -3987.808    2533.504          
148                          GarageCond__Fa   4076.2462    5771.281  0.706  0.480   -7251.856   15400.000          
149                          GarageCond__Gd   -743.9386    8644.106 -0.086  0.931  -17700.000   16200.000          
150                          GarageCond__Po  -8563.6419   11100.000 -0.769  0.442  -30400.000   13300.000          
151                          GarageCond__TA   5231.7177    5071.013  1.032  0.303   -4721.871   15200.000          
152      GarageFinish_NA_indicator__Missing    727.5351    1661.179  0.438  0.662   -2533.094    3988.164          
153  GarageFinish_NA_indicator__Not missing   -727.1516    1661.193 -0.438  0.662   -3987.808    2533.504          
154                       GarageFinish__Fin   2417.1179    1754.427  1.378  0.169   -1026.542    5860.778          
155                       GarageFinish__RFn  -1125.7719    1517.488 -0.742  0.458   -4104.359    1852.815          
156                       GarageFinish__Unf  -1290.9626    1839.847 -0.702  0.483   -4902.288    2320.363          
157        GarageQual_NA_indicator__Missing    727.5351    1661.179  0.438  0.662   -2533.094    3988.164          
158    GarageQual_NA_indicator__Not missing   -727.1516    1661.193 -0.438  0.662   -3987.808    2533.504          
159                          GarageQual__Fa -10120.0000    5087.144 -1.990  0.047  -20100.000    -139.028         *
160                          GarageQual__Gd  12060.0000    7099.162  1.699  0.090   -1870.575   26000.000         .
161                          GarageQual__TA  -1939.2834    3991.229 -0.486  0.627   -9773.428    5894.862          
162        GarageType_NA_indicator__Missing    727.5351    1661.179  0.438  0.662   -2533.094    3988.164          
163    GarageType_NA_indicator__Not missing   -727.1516    1661.193 -0.438  0.662   -3987.808    2533.504          
164                      GarageType__Attchd  -4318.3019    3345.217 -1.291  0.197  -10900.000    2247.825          
165                     GarageType__Basment   7462.5291    7919.330  0.942  0.346   -8081.849   23000.000          
166                     GarageType__BuiltIn  -9733.4806    4987.970 -1.951  0.051  -19500.000      57.108         .
167                     GarageType__CarPort   6939.3664    8825.394  0.786  0.432  -10400.000   24300.000          
168                      GarageType__Detchd   -349.7296    3505.461 -0.100  0.921   -7230.389    6530.930          
169                             GarageYrBlt    -19.5433      83.201 -0.235  0.814    -182.853     143.766          
170       GarageYrBlt_NA_indicator__Missing    727.5351    1661.179  0.438  0.662   -2533.094    3988.164          
171   GarageYrBlt_NA_indicator__Not missing   -727.1516    1661.193 -0.438  0.662   -3987.808    2533.504          
172                               GrLivArea     52.1632      12.109  4.308  0.000      28.394      75.932       ***
173                                HalfBath  -2935.4332    4681.450 -0.627  0.531  -12100.000    6253.505          
174                     HalfBath_str__"0.0"  -1614.7854    1025.519 -1.575  0.116   -3627.715     398.144          
175                     HalfBath_str__"1.0"   6165.7708    4731.229  1.303  0.193   -3120.876   15500.000          
176                     HalfBath_str__"2.0"  -4550.6020    4593.264 -0.991  0.322  -13600.000    4465.242          
177                           HeatingQC__Ex   1229.8420    2308.660  0.533  0.594   -3301.689    5761.372          
178                           HeatingQC__Fa   2064.9344    4576.387  0.451  0.652   -6917.781   11000.000          
179                           HeatingQC__Gd  -2555.5386    2320.533 -1.101  0.271   -7110.375    1999.298          
180                           HeatingQC__TA   -738.8543    2144.572 -0.345  0.731   -4948.306    3470.598          
181                           Heating__GasA   2346.0492    6025.947  0.389  0.697   -9481.922   14200.000          
182                           Heating__GasW   3507.0692    8154.142  0.430  0.667  -12500.000   19500.000          
183                     Heating__Rare cases  -5852.7349    9018.722 -0.649  0.517  -23600.000   11800.000          
184                      HouseStyle__1.5Fin  -8037.6487   11200.000 -0.715  0.475  -30100.000   14000.000          
185                      HouseStyle__1.5Unf  21640.0000   31000.000  0.698  0.485  -39200.000   82500.000          
186                      HouseStyle__1Story     90.0232    9919.235  0.009  0.993  -19400.000   19600.000          
187                      HouseStyle__2.5Unf -15140.0000   20000.000 -0.756  0.450  -54500.000   24200.000          
188                      HouseStyle__2Story  -7982.6897    9562.751 -0.835  0.404  -26800.000   10800.000          
189                  HouseStyle__Rare cases  -2978.5384   21500.000 -0.138  0.890  -45200.000   39300.000          
190                      HouseStyle__SFoyer   6426.7421   14300.000  0.449  0.653  -21700.000   34500.000          
191                        HouseStyle__SLvl   5982.3074   16400.000  0.366  0.715  -26100.000   38100.000          
192                            KitchenAbvGr   1312.1635   15200.000  0.087  0.931  -28400.000   31100.000          
193                 KitchenAbvGr_str__"1.0"   7027.1457    7991.732  0.879  0.379   -8659.346   22700.000          
194                 KitchenAbvGr_str__"2.0"  -7026.7622    7991.677 -0.879  0.380  -22700.000    8659.622          
195                         KitchenQual__Ex   8612.0761    4329.703  1.989  0.047     113.562   17100.000         *
196                         KitchenQual__Fa   2526.7026    5361.600  0.471  0.638   -7997.261   13100.000          
197                         KitchenQual__Gd  -4334.4466    2642.029 -1.641  0.101   -9520.327     851.434          
198                         KitchenQual__TA  -6803.9488    2567.172 -2.650  0.008  -11800.000   -1765.001        **
199                        LandContour__Bnk -15880.0000    4452.872 -3.566  0.000  -24600.000   -7139.337       ***
200                        LandContour__HLS   4696.1417    4575.812  1.026  0.305   -4285.445   13700.000          
201                        LandContour__Low   5738.3916    5791.022  0.991  0.322   -5628.458   17100.000          
202                        LandContour__Lvl   5445.4619    3128.901  1.740  0.082    -696.071   11600.000         .
203                          LandSlope__Gtl   2328.8313    6293.011  0.370  0.711  -10000.000   14700.000          
204                          LandSlope__Mod   7482.4480    6248.464  1.197  0.231   -4782.288   19700.000          
205                          LandSlope__Sev  -9810.8959   11200.000 -0.878  0.380  -31700.000   12100.000          
206                                 LotArea      0.6095       0.162  3.762  0.000       0.291       0.928       ***
207                       LotConfig__Corner   1759.6378    2383.252  0.738  0.461   -2918.304    6437.580          
208                      LotConfig__CulDSac  10040.0000    3233.673  3.104  0.002    3691.383   16400.000        **
209                          LotConfig__FR2 -12470.0000    3986.806 -3.128  0.002  -20300.000   -4647.173        **
210                       LotConfig__Inside    674.8151    1951.121  0.346  0.730   -3154.923    4504.554          
211                             LotFrontage   -138.7414      66.595 -2.083  0.038    -269.456      -8.026         *
212       LotFrontage_NA_indicator__Missing   -689.2890    1334.840 -0.516  0.606   -3309.367    1930.789          
213   LotFrontage_NA_indicator__Not missing    689.6725    1334.882  0.517  0.606   -1930.487    3309.831          
214                           LotShape__IR1   -951.5591    3460.914 -0.275  0.783   -7744.780    5841.662          
215                           LotShape__IR2   4690.0655    5215.157  0.899  0.369   -5546.454   14900.000          
216                           LotShape__IR3  -5727.1099    8509.849 -0.673  0.501  -22400.000   11000.000          
217                           LotShape__Reg   1988.9869    3481.516  0.571  0.568   -4844.672    8822.646          
218                            LowQualFinSF     62.3015      33.508  1.859  0.063      -3.468     128.071         .
219                 LowQualFinSF_str__"0.0"   8232.4951    7632.733  1.079  0.281   -6749.339   23200.000          
220            LowQualFinSF_str__Rare cases  -8232.1117    7632.649 -1.079  0.281  -23200.000    6749.558          
221                              MSSubClass    209.0630    1128.542  0.185  0.853   -2006.085    2424.211          
222                 MSSubClass_str__"120.0" -19650.0000   43600.000 -0.451  0.652 -105000.000   65900.000          
223                 MSSubClass_str__"160.0" -41810.0000   87400.000 -0.478  0.632 -213000.000  130000.000          
224                 MSSubClass_str__"180.0" -53310.0000  110000.000 -0.484  0.628 -269000.000  163000.000          
225                 MSSubClass_str__"190.0"  -9951.2253   51800.000 -0.192  0.848 -112000.000   91800.000          
226                  MSSubClass_str__"20.0"  32800.0000   73300.000  0.447  0.655 -111000.000  177000.000          
227                  MSSubClass_str__"30.0"  24350.0000   63600.000  0.383  0.702 -100000.000  149000.000          
228                  MSSubClass_str__"45.0"   5800.6073   56100.000  0.103  0.918 -104000.000  116000.000          
229                  MSSubClass_str__"50.0"  23670.0000   41300.000  0.573  0.567  -57400.000  105000.000          
230                  MSSubClass_str__"60.0"   8988.4997   29100.000  0.309  0.758  -48200.000   66100.000          
231                  MSSubClass_str__"70.0"  12490.0000   20400.000  0.612  0.541  -27500.000   52500.000          
232                  MSSubClass_str__"75.0"  11300.0000   25100.000  0.450  0.652  -37900.000   60600.000          
233                  MSSubClass_str__"80.0"   7779.0699   17800.000  0.438  0.661  -27100.000   42600.000          
234                  MSSubClass_str__"85.0"  -3965.4521   15700.000 -0.253  0.801  -34800.000   26800.000          
235                  MSSubClass_str__"90.0"   1505.5745    7519.922  0.200  0.841  -13300.000   16300.000          
236                       MSZoning__C (all) -23410.0000   11800.000 -1.980  0.048  -46600.000    -201.719         *
237                            MSZoning__FV  16100.0000    8190.902  1.965  0.050      20.411   32200.000         .
238                            MSZoning__RH   6164.7116    8822.004  0.699  0.485  -11200.000   23500.000          
239                            MSZoning__RL   1938.3297    4593.380  0.422  0.673   -7077.741   11000.000          
240                            MSZoning__RM   -791.2940    5033.480 -0.157  0.875  -10700.000    9088.623          
241                              MasVnrArea     18.7504       8.157  2.299  0.022       2.740      34.760         *
242    MasVnrArea_NA_indicator__Not missing   2507.7872    2941.608  0.853  0.394   -3266.120    8281.694          
243     MasVnrArea_NA_indicator__Rare cases  -2507.4037    2941.685 -0.852  0.394   -8281.462    3266.654          
244    MasVnrType_NA_indicator__Not missing   2507.7872    2941.608  0.853  0.394   -3266.120    8281.694          
245     MasVnrType_NA_indicator__Rare cases  -2507.4037    2941.685 -0.852  0.394   -8281.462    3266.654          
246                      MasVnrType__BrkCmn -10310.0000    6544.107 -1.575  0.116  -23200.000    2540.033          
247                     MasVnrType__BrkFace    284.8077    2723.092  0.105  0.917   -5060.186    5629.802          
248                        MasVnrType__None   3521.8631    2964.651  1.188  0.235   -2297.273    9340.999          
249                       MasVnrType__Stone   6498.7159    3525.338  1.843  0.066    -420.960   13400.000         .
250       MiscFeature_NA_indicator__Missing  -6605.3742   13900.000 -0.475  0.635  -33900.000   20700.000          
251   MiscFeature_NA_indicator__Not missing   6605.7576   13900.000  0.475  0.635  -20700.000   33900.000          
252                       MiscFeature__Othr  -4070.7299   11300.000 -0.361  0.718  -26200.000   18000.000          
253                       MiscFeature__Shed   7821.9288    6318.024  1.238  0.216   -4579.342   20200.000          
254                       MiscFeature__TenC  -3750.8155    8016.606 -0.468  0.640  -19500.000   12000.000          
255                                 MiscVal      0.8021       1.881  0.426  0.670      -2.891       4.495          
256                      MiscVal_str__"0.0"   9502.5133   18900.000  0.503  0.615  -27600.000   46600.000          
257                    MiscVal_str__"400.0"  -6497.3462   11600.000 -0.562  0.574  -29200.000   16200.000          
258                 MiscVal_str__Rare cases  -3004.7837   10600.000 -0.283  0.777  -23900.000   17900.000          
259                                  MoSold   -593.6744     365.863 -1.623  0.105   -1311.804     124.455          
260                       MoSold_str__"1.0"   4052.9206    3710.622  1.092  0.275   -3230.438   11300.000          
261                      MoSold_str__"10.0"  -6586.0072    3430.105 -1.920  0.055  -13300.000     146.741         .
262                      MoSold_str__"11.0"   3261.9881    3481.555  0.937  0.349   -3571.748   10100.000          
263                      MoSold_str__"12.0"    715.0770    3797.457  0.188  0.851   -6738.725    8168.879          
264                       MoSold_str__"2.0"  -5780.0210    3780.408 -1.529  0.127  -13200.000    1640.315          
265                       MoSold_str__"3.0"  -1027.2701    3180.705 -0.323  0.747   -7270.485    5215.945          
266                       MoSold_str__"4.0"   1219.9308    2875.211  0.424  0.671   -4423.648    6863.510          
267                       MoSold_str__"5.0"    425.0874    2399.001  0.177  0.859   -4283.769    5133.943          
268                       MoSold_str__"6.0"    688.8702    2196.517  0.314  0.754   -3622.542    5000.282          
269                       MoSold_str__"7.0"   4668.7214    2329.075  2.005  0.045      97.119    9240.324         *
270                       MoSold_str__"8.0"  -2323.3292    2879.895 -0.807  0.420   -7976.103    3329.445          
271                       MoSold_str__"9.0"    684.4155    3906.799  0.175  0.861   -6984.006    8352.837          
272                   Neighborhood__Blmngtn   8898.3208   10200.000  0.870  0.384  -11200.000   29000.000          
273                    Neighborhood__BrDale    851.7081   11600.000  0.073  0.942  -21900.000   23600.000          
274                   Neighborhood__BrkSide  -5571.3563    6492.253 -0.858  0.391  -18300.000    7171.898          
275                   Neighborhood__ClearCr  -5608.8279    8064.181 -0.696  0.487  -21400.000   10200.000          
276                   Neighborhood__CollgCr  -2391.8826    3931.458 -0.608  0.543  -10100.000    5324.941          
277                   Neighborhood__Crawfor  16160.0000    6034.185  2.678  0.008    4316.191   28000.000        **
278                   Neighborhood__Edwards -22720.0000    4317.363 -5.262  0.000  -31200.000  -14200.000       ***
279                   Neighborhood__Gilbert  -5695.3204    5300.230 -1.075  0.283  -16100.000    4708.185          
280                    Neighborhood__IDOTRR -12570.0000    8575.214 -1.466  0.143  -29400.000    4257.291          
281                   Neighborhood__MeadowV  -9615.6311   13100.000 -0.737  0.462  -35200.000   16000.000          
282                   Neighborhood__Mitchel -14520.0000    5634.512 -2.577  0.010  -25600.000   -3461.588         *
283                     Neighborhood__NAmes -10310.0000    3703.522 -2.783  0.006  -17600.000   -3036.281        **
284                    Neighborhood__NWAmes  -7765.5963    5043.373 -1.540  0.124  -17700.000    2133.739          
285                   Neighborhood__NoRidge  34920.0000    6782.590  5.148  0.000   21600.000   48200.000       ***
286                   Neighborhood__NridgHt  25730.0000    5554.711  4.632  0.000   14800.000   36600.000       ***
287                   Neighborhood__OldTown -13410.0000    6496.737 -2.064  0.039  -26200.000    -656.014         *
288                Neighborhood__Rare cases  11810.0000   12600.000  0.940  0.347  -12800.000   36500.000          
289                     Neighborhood__SWISU -24620.0000    8976.734 -2.743  0.006  -42200.000   -6999.900        **
290                    Neighborhood__Sawyer  -6548.3699    4699.991 -1.393  0.164  -15800.000    2676.961          
291                   Neighborhood__SawyerW  -1972.7708    4862.190 -0.406  0.685  -11500.000    7570.931          
292                   Neighborhood__Somerst   -938.1490    8148.718 -0.115  0.908  -16900.000   15100.000          
293                   Neighborhood__StoneBr  43120.0000    7651.029  5.636  0.000   28100.000   58100.000       ***
294                    Neighborhood__Timber -11260.0000    6507.294 -1.730  0.084  -24000.000    1516.271         .
295                   Neighborhood__Veenker  14020.0000    9555.638  1.467  0.143   -4739.385   32800.000          
296                             OpenPorchSF     32.5969      17.460  1.867  0.062      -1.675      66.869         .
297                             OverallCond   2023.2101    4604.864  0.439  0.661   -7015.402   11100.000          
298                  OverallCond_str__"3.0"  -8761.9856   14600.000 -0.602  0.548  -37300.000   19800.000          
299                  OverallCond_str__"4.0"  -6688.1319   10300.000 -0.652  0.515  -26800.000   13400.000          
300                  OverallCond_str__"5.0"  -2302.9238    5764.646 -0.399  0.690  -13600.000    9012.154          
301                  OverallCond_str__"6.0"   1747.6884    2858.778  0.611  0.541   -3863.636    7359.013          
302                  OverallCond_str__"7.0"   7716.3977    5290.143  1.459  0.145   -2667.306   18100.000          
303                  OverallCond_str__"8.0"   1640.2865   10100.000  0.163  0.871  -18100.000   21400.000          
304                  OverallCond_str__"9.0"   6649.0521   15500.000  0.428  0.669  -23900.000   37200.000          
305                             OverallQual   7782.7842    6578.222  1.183  0.237   -5129.214   20700.000          
306                 OverallQual_str__"10.0"  14430.0000   25100.000  0.575  0.565  -34800.000   63700.000          
307                  OverallQual_str__"3.0"    795.1478   23300.000  0.034  0.973  -45000.000   46600.000          
308                  OverallQual_str__"4.0"  -3258.6122   16700.000 -0.196  0.845  -36000.000   29500.000          
309                  OverallQual_str__"5.0"  -8955.9951   10900.000 -0.825  0.410  -30300.000   12400.000          
310                  OverallQual_str__"6.0" -11320.0000    4852.299 -2.333  0.020  -20800.000   -1797.186         *
311                  OverallQual_str__"7.0" -11790.0000    4441.861 -2.654  0.008  -20500.000   -3070.359        **
312                  OverallQual_str__"8.0"  -4242.6962   10100.000 -0.419  0.675  -24100.000   15600.000          
313                  OverallQual_str__"9.0"  24350.0000   17600.000  1.381  0.168  -10300.000   59000.000          
314                           PavedDrive__N   -204.0493    3672.084 -0.056  0.956   -7411.763    7003.664          
315                           PavedDrive__P    125.0322    4647.113  0.027  0.979   -8996.508    9246.572          
316                           PavedDrive__Y     79.4006    2987.590  0.027  0.979   -5784.761    5943.562          
317                                PoolArea    -28.6800      24.519 -1.170  0.242     -76.807      19.446          
318                              PoolQC__Ex    804.5380    2509.551  0.321  0.749   -4121.310    5730.386          
319                              PoolQC__Fa    944.2044    2117.761  0.446  0.656   -3212.622    5101.031          
320                              PoolQC__Gd  -1748.3590    1981.536 -0.882  0.378   -5637.797    2141.079          
321                       RoofMatl__CompShg  -3657.8301   10400.000 -0.353  0.724  -24000.000   16700.000          
322                    RoofMatl__Rare cases   7365.9624    9714.479  0.758  0.449  -11700.000   26400.000          
323                       RoofMatl__Tar&Grv  -3707.7489   17000.000 -0.217  0.828  -37200.000   29800.000          
324                         RoofStyle__Flat  -1387.7548   19400.000 -0.071  0.943  -39500.000   36700.000          
325                        RoofStyle__Gable  -5263.7560    6084.342 -0.865  0.387  -17200.000    6678.835          
326                      RoofStyle__Gambrel  -4020.5639   10700.000 -0.375  0.708  -25100.000   17000.000          
327                          RoofStyle__Hip  -5775.6298    6358.011 -0.908  0.364  -18300.000    6704.129          
328                   RoofStyle__Rare cases  16450.0000   11500.000  1.434  0.152   -6073.114   39000.000          
329                  SaleCondition__Abnorml  -6090.0190    7035.650 -0.866  0.387  -19900.000    7719.837          
330                   SaleCondition__Alloca   4526.0453   12600.000  0.358  0.720  -20300.000   29300.000          
331                   SaleCondition__Family -11660.0000    8874.699 -1.314  0.189  -29100.000    5756.829          
332                   SaleCondition__Normal   -170.0886    6524.633 -0.026  0.979  -13000.000   12600.000          
333                  SaleCondition__Partial  13400.0000   22800.000  0.588  0.557  -31400.000   58200.000          
334                           SaleType__COD  -1857.5962    8478.234 -0.219  0.827  -18500.000   14800.000          
335                           SaleType__New  -5453.7123   21600.000 -0.253  0.800  -47800.000   36900.000          
336                    SaleType__Rare cases   8174.6465    8940.149  0.914  0.361   -9373.437   25700.000          
337                            SaleType__WD   -862.9546    7526.746 -0.115  0.909  -15600.000   13900.000          
338                             ScreenPorch     50.4601      17.299  2.917  0.004      16.505      84.415        **
339                            TotRmsAbvGrd   1821.4926    5343.766  0.341  0.733   -8667.466   12300.000          
340                TotRmsAbvGrd_str__"10.0"  15580.0000   13900.000  1.118  0.264  -11800.000   42900.000          
341                TotRmsAbvGrd_str__"11.0" -29510.0000   19800.000 -1.489  0.137  -68400.000    9382.096          
342                TotRmsAbvGrd_str__"12.0" -12830.0000   24200.000 -0.531  0.595  -60200.000   34600.000          
343                 TotRmsAbvGrd_str__"3.0"  -1891.5622   24200.000 -0.078  0.938  -49400.000   45600.000          
344                 TotRmsAbvGrd_str__"4.0"   3737.4070   18300.000  0.205  0.838  -32100.000   39600.000          
345                 TotRmsAbvGrd_str__"5.0"   5081.3684   12900.000  0.394  0.694  -20300.000   30400.000          
346                 TotRmsAbvGrd_str__"6.0"   4094.6722    7986.530  0.513  0.608  -11600.000   19800.000          
347                 TotRmsAbvGrd_str__"7.0"   4131.7814    3837.773  1.077  0.282   -3401.153   11700.000          
348                 TotRmsAbvGrd_str__"8.0"   4649.8681    4552.791  1.021  0.307   -4286.533   13600.000          
349                 TotRmsAbvGrd_str__"9.0"   6963.4045    9007.686  0.773  0.440  -10700.000   24600.000          
350                             TotalBsmtSF      2.5216       6.409  0.393  0.694     -10.059      15.102          
351                              WoodDeckSF     19.2837       8.125  2.373  0.018       3.336      35.232         *
352                               YearBuilt    155.1413     118.273  1.312  0.190     -77.009     387.292          
353                            YearRemodAdd     89.5473      78.454  1.141  0.254     -64.445     243.540          
354                                  YrSold   -235.6229     153.221 -1.538  0.124    -536.372      65.126          
355                    YrSold_str__"2006.0"   1434.5202    1856.169  0.773  0.440   -2208.842    5077.883          
356                    YrSold_str__"2007.0"  -2724.1054    1748.398 -1.558  0.120   -6155.932     707.721          
357                    YrSold_str__"2008.0"    371.3292    1767.629  0.210  0.834   -3098.244    3840.903          
358                    YrSold_str__"2009.0"   2697.8848    1705.531  1.582  0.114    -649.799    6045.569          
359                    YrSold_str__"2010.0"  -1779.2455    2374.509 -0.749  0.454   -6440.028    2881.537          

-------------------------

 --- Model statistic --- 

R-squared         : 0.918
Adj. R-squared    : 0.892
F-statistic       : 35
Prob (F-statistic): 0.0
No. Observations  : 1095
AIC               : 25567
Df Residuals      : 826
BIC               : 26911
RMSE (test)       : 32029

-------------------------

Maximum correlation between Reseduals and any data columns is 9.164997151410843e-13, with columns <LotShape__IR2>
Mean of train reseduals: -1.071393064576197e-08

 ------------------------------------- Random Forest -------------------------------------


-------------------------

RF model peramters:

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'criterion': 'mse',
 'max_depth': None,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 200,
 'n_jobs': None,
 'oob_score': True,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}

-------------------------

-------------------------

--- Model statistic ---
R^2 (test) : 0.8761020925815912
R^2 (train): 0.9811453430304609
RMSE (test): 29458
oob score  : 0.85891384774956

-------------------------

Maximum correlation between Reseduals and any data columns is 0.3752565328359765, with columns <PoolArea>